Dissertation Appendix C

Chapter 2: Regression based random forest modeling of inland pacific northwest (iPNW) drought-related wheat insurance loss, using time lagged climate correlation matrix assocation

Appendix C documents Chapter 2 analyses related to agricultural commodity loss, at a county level, from 2001-2015, across the 24 county region of the Inland Pacific Northwest (IPNW). In this analysis, we use a time lagged matrix approach to examine a range of combinations of climate and insurance loss for wheat due to drought.

Our analysis can be described in 5 specific steps:

Step 1. We subset insurance loss claims by wheat due to drought

Step 2. We aggregate climate data by month and county for the 24 county study area

Step 3. We construct a design matrix of all monthly climate combinations, per county and per climate variable.

Step 4. We select the monthly combination that has the highest correlation with wheat/drought claims, per county, for each year (2001 to 2015). Based on the output, we assemble an optimized climate/insurance loss dataset for all counties.

Step 5. We examine conditional relationships of climate to insurance loss using regression based rand forest analysis

Step 1. Examining wheat/drought acreage ratios compared to average precipitation, potential evapotranspiration, and aridity for the iPNW study area, 2001 to 2015

In step 1, we perform a broad examination of wheat insurance loss due to drought for our 24 county iPNW study area, by calculating the ratio of wheat/drought acreage claims vs. all other wheat related damage cause claims, and comparing that data to individual climate variables. Example: For Whitman County, WA - we calculated the total amount of insurance claim acreage for wheat due to drought (for 2001 to 2015 combined), and divided that by the total amount of wheat insurane acreage for all other damage causes (excessive moisture, hail, frost, freeze, etc). We did this for each of the 24 counties.

We then calculate a summary value for each climate variable (plus aridity - which is PR / PET), for each county, from 2001 to 2015. We then constructed three scatterplots for comparison. Each point represents a county. For precipitaiton and potential evapotranspiration, we use the average annual total.

ipnw_climate_county_comparison <- function(var_commodity, var_damage) {

library(plotrix)
library(ggplot2)
  library(gridExtra)


setwd("/dmine/data/USDA/agmesh-scenarios/Idaho/summaries")
temp = list.files(pattern = "_summary")
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)

ID1 <- aggregate(.~countyfips + year, ziggy.df, sum)
ID1 <- aggregate(.~countyfips, ID1, mean)

setwd("/dmine/data/USDA/agmesh-scenarios/Washington/summaries")
temp = list.files(pattern = "_summary")
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)

WA1 <- aggregate(.~countyfips + year, ziggy.df, sum)
WA1 <- aggregate(.~countyfips, WA1, mean)

setwd("/dmine/data/USDA/agmesh-scenarios/Oregon/summaries")
temp = list.files(pattern = "_summary")
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)

OR1 <- aggregate(.~countyfips + year, ziggy.df, sum)
OR1 <- aggregate(.~countyfips, OR1, mean)

countiesfips <- read.csv("/dmine/data/counties/counties_fips.csv", header = TRUE)
colnames(countiesfips) <- c("countyfips", "county", "state")

OR2 <- merge(countiesfips, OR1, by=("countyfips"))
ID2 <- merge(countiesfips, ID1, by=("countyfips"))
WA2 <- merge(countiesfips, WA1, by=("countyfips"))


#------

setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/")
#setwd(monthdir2)

temp = list.files(pattern = paste("Idaho", "_summary", sep=""))
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)
xrange <- as.data.frame(ziggy.df)
xrange$county <- as.character(xrange$county)
xrange$damagecause <- as.character(xrange$damagecause)

xrange_1989 <- subset(xrange, year <= 2000)
xrange_2015 <- subset(xrange, year >= 2001)
xrange_1989$loss <- xrange_1989$acres
xrange_1989$acres <- NA

xrange <- rbind(xrange_2015)

xrange$commodity <- trimws(xrange$commodity)
xrange$county <- trimws(xrange$county)
xrange$damagecause <- trimws(xrange$damagecause)


#--acres for all damage causes aggregated
ID_loss_commodity <- subset(xrange, commodity == var_commodity)
ID_loss_all <- aggregate(ID_loss_commodity$acres, by=list(ID_loss_commodity$county, ID_loss_commodity$state), FUN = "sum")
colnames(ID_loss_all) <- c("county", "state", "all_damagecause_acres")


#-loss and lossperacre for just one damage cause

ID_loss1 <- subset(xrange, commodity == var_commodity & damagecause == var_damage)
ID_loss2 <- aggregate(ID_loss1$loss, by=list(ID_loss1$county, ID_loss1$state), FUN = "sum")
ID_loss2_acres <- aggregate(ID_loss1$acres, by=list(ID_loss1$county, ID_loss1$state), FUN = "sum")
colnames(ID_loss2) <- c("county", "state", "loss")
colnames(ID_loss2_acres) <- c("county", "state", "acres")

ID_loss2$acres <- ID_loss2_acres$acres
ID_loss2$lossperacre <- ID_loss2$loss / ID_loss2$acres

#--

ID_loss_climate <- merge(ID_loss2, ID2, by=c("county", "state"))
ID_loss_climate_2 <- merge(ID_loss_all, ID_loss_climate, by=c("county", "state"))

#---WA

setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/")
#setwd(monthdir2)

temp = list.files(pattern = paste("Washington", "_summary", sep=""))
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)
xrange <- as.data.frame(ziggy.df)
xrange$county <- as.character(xrange$county)
xrange$damagecause <- as.character(xrange$damagecause)

xrange_1989 <- subset(xrange, year <= 2000)
xrange_2015 <- subset(xrange, year >= 2001)
xrange_1989$loss <- xrange_1989$acres
xrange_1989$acres <- NA

xrange <- rbind(xrange_2015)

xrange$commodity <- trimws(xrange$commodity)
xrange$county <- trimws(xrange$county)
xrange$damagecause <- trimws(xrange$damagecause)


#--acres for all damage causes aggregated
WA_loss_commodity <- subset(xrange, commodity == var_commodity)
WA_loss_all <- aggregate(WA_loss_commodity$acres, by=list(WA_loss_commodity$county, WA_loss_commodity$state), FUN = "sum")
colnames(WA_loss_all) <- c("county", "state", "all_damagecause_acres")


WA_loss1 <- subset(xrange, commodity == var_commodity & damagecause == var_damage)
WA_loss2 <- aggregate(WA_loss1$loss, by=list(WA_loss1$county, WA_loss1$state), FUN = "sum")
WA_loss2_acres <- aggregate(WA_loss1$acres, by=list(WA_loss1$county, WA_loss1$state), FUN = "sum")
colnames(WA_loss2) <- c("county", "state", "loss")
colnames(WA_loss2_acres) <- c("county", "state", "acres")

WA_loss2$acres <- WA_loss2_acres$acres
WA_loss2$lossperacre <- WA_loss2$loss / WA_loss2$acres


WA_loss_climate <- merge(WA_loss2, WA2, by=c("county", "state"))
WA_loss_climate_2 <- merge(WA_loss_all, WA_loss_climate, by=c("county", "state"))


#--OR

setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/")
#setwd(monthdir2)

temp = list.files(pattern = paste("Oregon", "_summary", sep=""))
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)
xrange <- as.data.frame(ziggy.df)
xrange$county <- as.character(xrange$county)
xrange$damagecause <- as.character(xrange$damagecause)

xrange_1989 <- subset(xrange, year <= 2000)
xrange_2015 <- subset(xrange, year >= 2001)
xrange_1989$loss <- xrange_1989$acres
xrange_1989$acres <- NA

xrange <- rbind(xrange_2015)

xrange$commodity <- trimws(xrange$commodity)
xrange$county <- trimws(xrange$county)
xrange$damagecause <- trimws(xrange$damagecause)


#--acres for all damage causes aggregated
OR_loss_commodity <- subset(xrange, commodity == var_commodity)
OR_loss_all <- aggregate(OR_loss_commodity$acres, by=list(OR_loss_commodity$county, OR_loss_commodity$state), FUN = "sum")
colnames(OR_loss_all) <- c("county", "state", "all_damagecause_acres")

OR_loss1 <- subset(xrange, commodity == var_commodity & damagecause == var_damage)
OR_loss2 <- aggregate(OR_loss1$loss, by=list(OR_loss1$county, OR_loss1$state), FUN = "sum")
OR_loss2_acres <- aggregate(OR_loss1$acres, by=list(OR_loss1$county, OR_loss1$state), FUN = "sum")
colnames(OR_loss2) <- c("county", "state", "loss")
colnames(OR_loss2_acres) <- c("county", "state", "acres")

OR_loss2$acres <- OR_loss2_acres$acres
OR_loss2$lossperacre <- OR_loss2$loss / OR_loss2$acres


OR_loss_climate <- merge(OR_loss2, OR2, by=c("county", "state"))
OR_loss_climate_2 <- merge(OR_loss_all, OR_loss_climate, by=c("county", "state"))





#-merge all three states

iPNW_loss_climate <- rbind(OR_loss_climate_2, ID_loss_climate_2, WA_loss_climate_2)

#--subset to only iPNW 24 counties

Franklin <- subset(iPNW_loss_climate, county == "Franklin" & state == "WA")

iPNW_loss_climate <- subset(iPNW_loss_climate, county == "Benewah" 
                             | county == "Latah" | county == "Nez Perce" | county == "Lewis" 
                             | county == "Idaho" | county == "Wasco" | county == "Sherman" 
                             | county == "Gilliam" | county == "Morrow" | county == "Umatilla" 
                             | county == "Union" | county == "Wallowa" | county == "Douglas" 
                             | county == "Walla Walla" | county == "Benton" | county == "Columbia" 
                             | county == "Asotin" | county == "Garfield" 
                             | county == "Grant" | county =="Whitman" | county == "Spokane" 
                             | county == "Lincoln" | county == "Adams" )

iPNW_loss_climate <- rbind(iPNW_loss_climate, Franklin)


iPNW_loss_climate$pct_acreage <- iPNW_loss_climate$acres / iPNW_loss_climate$all_damagecause_acres



#iPNW_loss_climate <- subset(iPNW_loss_climate, year == var_year)

#tmmx

iPNW_loss_climate_tmmx <- iPNW_loss_climate[order(iPNW_loss_climate$tmmx),] 

iPNW_loss_climate_tmmx$tmmx <- iPNW_loss_climate_tmmx$tmmx - 273


#not needed#yrangemin <- min(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))) - (.05 * min(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))))
#not needed#yrangemax <- max(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))) + (.05 * max(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))))

#y2rangemin <- min(iPNW_loss_climate_tmmx$tmmx) - (.05 * min(iPNW_loss_climate_tmmx$tmmx))
#y2rangemax <- max(iPNW_loss_climate_tmmx$tmmx) + (.05 * max(iPNW_loss_climate_tmmx$tmmx))

#twoord.plot(c(1:nrow(iPNW_loss_climate_tmmx)), iPNW_loss_climate_tmmx$pct_acreage, c(1:nrow(iPNW_loss_climate_tmmx)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_tmmx$tmmx, mar=c(8,4,4,4), ylab = "% wheat insurance loss acreage due to drought", xticklab=c(" "), rylab = "Mean Max Temperature (C)", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste(" 2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to mean TMMX", sep=""))
#text(1:nrow(iPNW_loss_climate_tmmx), 0 - .05, srt = 90, adj = 1,
#     labels = iPNW_loss_climate_tmmx$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 2)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 2)
#mtext(4, text = "Annual Mean Precipitation (mm)", line = 1, cex = 2)

#pr

par(mfrow=c(1,3),mar=c(11.8, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)

layout(matrix(c(1,2,3), 1, 3, byrow = TRUE))


iPNW_loss_climate_pr <- iPNW_loss_climate[order(-iPNW_loss_climate$pr),] 
iPNW_loss_climate_pr$pr_year <- iPNW_loss_climate_pr$pr * 12

pr <- ggplot(iPNW_loss_climate_pr, aes(x=pr_year, y=pct_acreage)) + 
  geom_point()+
  geom_smooth(method = "loess")+
  xlab("Annual Precipitation (mm)") + ylab("Percentage Drought Wheat Acreage Loss") +
  theme(text=element_text(size=16, family="serif"))

#yrangemin <- min(iPNW_loss_climate_pr$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pr$lossperacre) + 10

#y2rangemin <- min(iPNW_loss_climate_pr$pr) - (.05 * min(iPNW_loss_climate_pr$pr))
#y2rangemax <- max(iPNW_loss_climate_pr$pr) + (.05 * max(iPNW_loss_climate_pr$pr))

#twoord.plot(c(1:nrow(iPNW_loss_climate_pr)), iPNW_loss_climate_pr$pct_acreage, c(1:nrow(iPNW_loss_climate_pr)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pr$pr, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_pr), 0 - .05, srt = 90, adj = 1, cex = 1.5,
#not needed#     labels = iPNW_loss_climate_pr$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean PR (mm)", line = 1, cex = 1.5)
#mtext(3, text = "2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to annual mean precipitation/county", line = 1, cex = 2)

#pr

#par(mar=c(11.8, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)

#iPNW_loss_climate_pr <- iPNW_loss_climate[order(-iPNW_loss_climate$pr),] 

#y2rangemin <- min(iPNW_loss_climate_pr$pr) - (.05 * min(iPNW_loss_climate_pr$pr))
#y2rangemax <- max(iPNW_loss_climate_pr$pr) + (.05 * max(iPNW_loss_climate_pr$pr))

#twoord.plot(c(1:nrow(iPNW_loss_climate_pr)), iPNW_loss_climate_pr$pct_acreage, c(1:nrow(iPNW_loss_climate_pr)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pr$pr, mar=c(8,4,4,4), ylab = "% wheat/drought insurance loss acreage", xticklab=c(" "), rylab = "Annual Mean Precipitation (mm)", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste("2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to annual mean PR/county", sep=""))
#text(1:nrow(iPNW_loss_climate_pr), 0 - .05, srt = 90, adj = 1, 
#     labels = iPNW_loss_climate_pr$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 9, cex = 1)

#pet


iPNW_loss_climate_pet <- iPNW_loss_climate[order(iPNW_loss_climate$pet),] 
iPNW_loss_climate_pet$pet_year <- iPNW_loss_climate_pet$pet * 12

pet <- ggplot(iPNW_loss_climate_pet, aes(x=pet_year, y=pct_acreage)) + 
  geom_point()+
  geom_smooth() +
  xlab("Annual PET (mm)") + ylab("Percentage Drought Wheat Acreage Loss") +
  theme(text=element_text(size=16, family="serif"))

#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10

#y2rangemin <- min(iPNW_loss_climate_pet$pet) - (.05 * min(iPNW_loss_climate_pet$pet))
#y2rangemax <- max(iPNW_loss_climate_pet$pet) + (.05 * max(iPNW_loss_climate_pet$pet))

#twoord.plot(c(1:nrow(iPNW_loss_climate_pet)), iPNW_loss_climate_pet$pct_acreage, c(1:nrow(iPNW_loss_climate_pet)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pet$pet, mar=c(8,4,4,4),  xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_pet), 0 - .05, srt = 90, adj = 1,
#not needed#     labels = iPNW_loss_climate_pet$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean PET (mm)", line = 1, cex = 1.5)

#pr/pet

iPNW_loss_climate_prpet <- iPNW_loss_climate
iPNW_loss_climate_prpet$pet_year <- iPNW_loss_climate_prpet$pet * 12
iPNW_loss_climate_prpet$pr_year <- iPNW_loss_climate_prpet$pr * 12

iPNW_loss_climate_prpet$prpet <- iPNW_loss_climate_prpet$pr_year / iPNW_loss_climate_prpet$pet_year
#iPNW_loss_climate_prpet <- iPNW_loss_climate[order(-iPNW_loss_climate$prpet),] 

# ggplot scatterplot
prpet <- ggplot(iPNW_loss_climate_prpet, aes(x=prpet, y=pct_acreage)) + 
  geom_point()+
  geom_smooth()+
  xlab("Aridity Index") + ylab("Percentage Drought Wheat Acreage Loss") +
  theme(text=element_text(size=16, family="serif"))


grid.arrange(pr, pet, prpet, nrow = 1)

#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10

#dual axis barplot

#y2rangemin <- min(iPNW_loss_climate_prpet$prpet) - (.05 * min(iPNW_loss_climate_prpet$prpet))
#y2rangemax <- max(iPNW_loss_climate_prpet$prpet) + (.05 * max(iPNW_loss_climate_prpet$prpet))

#twoord.plot(c(1:nrow(iPNW_loss_climate_prpet)), iPNW_loss_climate_prpet$pct_acreage, c(1:nrow(iPNW_loss_climate_prpet)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_prpet$prpet, mar=c(8,4,4,4),  xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_prpet), 0 - .05, srt = 90, adj = 1,
#not needed#     labels = iPNW_loss_climate_prpet$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#not needed#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean Aridity Index (PR/PET (mm))", line = 1, cex = 1.5)


#pdsi - not used

#iPNW_loss_climate_pdsi <- iPNW_loss_climate[order(iPNW_loss_climate$pdsi),] 


#not needed#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#not needed#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10

#y2rangemin <- min(iPNW_loss_climate_pdsi$pdsi) - (-.25 * min(iPNW_loss_climate_pdsi$pdsi))
#y2rangemax <- max(iPNW_loss_climate_pdsi$pdsi) + (-.5 * max(iPNW_loss_climate_pdsi$pdsi))

#twoord.plot(c(1:nrow(iPNW_loss_climate_pdsi)), iPNW_loss_climate_pdsi$pct_acreage, c(1:nrow(iPNW_loss_climate_pdsi)), rylim=c(y2rangemin, y2rangemax), lylim=c(0,1), iPNW_loss_climate_pdsi$pdsi, mar=c(8,4,4,4), ylab = "% wheat insurance loss acreage due to drought", xticklab=c(" "), rylab = "Mean PDSI", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste(" 2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to PDSI", sep=""))
#text(1:nrow(iPNW_loss_climate_pdsi), 0 - .05, srt = 90, adj = 1,
#     labels = iPNW_loss_climate_pdsi$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 5)



}

ipnw_climate_county_comparison("WHEAT", "Drought")

Step 2. Construction of design matrices of all monthly climate combinations, per county and per climate variable (precipitation, maximum temperature, and potential evapotranspiration)

For the three examined climate variables (potential evapotranspiration, precipitation, maximum temperature), a design matrix was developed for each of the 24 counties that are part of the defined iPNW study area. For each county, an absolute correlation was calculated for each monthly combination for each climate variable to the total wheat insurance loss due to drought. The result is a design matrix map that identifies the monthly combination with the highest correlation For example, for Whitman county, WA, for maximum temperature - the monthly combination with highest correlation between max temperature and wheat insurance loss due to drought was May/June/July (denoted as July 2).

Step 2.1 Maximum temperature design matrix for Whitman County, WA, wheat insurance loss due to drought

  state_var <- "WA"
  county_var <- "Whitman"
  commodity_var <- "WHEAT"
  damage_var <- "Drought"
  climate_var <- "tmmx"
  predictor_var <- "cube_root_loss"
  
  library(gplots)
  library(plyr)
  library(dplyr)
  #library(plotly)
  #packageVersion('plotly')
  
  #response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
  
  #monthlist is jan-dec, each repeated 12 times 
  monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))  
  #numlist is 12 months, repeated 12 times
  numlist <- as.data.frame(rep((1:12), times = 12))
  #monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
  monthnumlist <- as.data.frame(cbind(monthlist, numlist))
  #renaming columns
  colnames(monthnumlist) <- c("month", "monthcode")
  #put the monthlist all together in one vector
  monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
  #rename the vector
  climmonth <- monthnumlist$combined
  
  designmatrix <- matrix(NA, ncol=12, nrow=12)
  
  #create an empty 144 vector to fill up with correlations between loss and climate
  dmvector <- as.data.frame(rep(NA, times=144))
  
  cl = 0
  for (ppp in climmonth) {
    cl = cl +1
    
    
    setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_rev4")
    file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
    climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
    
    
    setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_summaries_rev3")
    file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
    file2 <- subset(file2, state == state_var)
    file2 <- subset(file2, county == county_var)
    colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
    
    
    climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
    colnames(climatevar[3]) <- "zscore"
    kor <- join(climatevar, file2, by = "year")
    kor2 <- subset(kor, damagecause != "NA")
    colnames(kor2)[3] <- "zscore"
    # kor2[9] <- as.numeric(kor2$zscore)
    kor3 <- cor(kor2[1], kor2[9])
    
    #insert kor3 into designmatrix iteratively
    
    dmvector[cl,] <- kor3
    
  }
  
  dmvector <- as.data.frame(dmvector)
  colnames(dmvector) <- "correlations"
  
  
  
  
  dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) ) 
  dmvector2 <- dmvector2[nrow(dmvector2):1, ]
  dmvector3 <- dmvector2[4:12,]
  
  dmvector3[9,4:12] <- NA
  dmvector3[8,5:12] <- NA
  dmvector3[7,6:12] <- NA
  dmvector3[6,7:12] <- NA
  dmvector3[5,8:12] <- NA
  dmvector3[4,9:12] <- NA
  dmvector3[3,10:12] <- NA
  dmvector3[2,11:12] <- NA
  dmvector3[1,12:12] <- NA
  
  dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)

  if(climate_var=='pr'){
    dmv <- which.min( dmvector[1:108,1]) 
  } else {
    if(climate_var=='rmin'){
      dmv <- which.min( dmvector[1:108,1]) 
    } else {
      if(climate_var=='rmax'){
        dmv <- which.min( dmvector[1:108,1]) 
      } else {
        if(climate_var=='tmmn'){
          dmv <- which.max( dmvector[1:108,1]) 
        } else {
          if(climate_var=='pet'){
            dmv <- which.max( dmvector[1:108,1]) 
          } else {
            if(climate_var=='fm100'){
              dmv <- which.min( dmvector[1:108,1]) 
            } else {
              if(climate_var=='fm1000'){
                dmv <- which.min( dmvector[1:108,1]) 
              } else {
                if(climate_var=='pdsi'){
                  dmv <- which.min( dmvector[1:108,1]) 
                } else {
                  if(climate_var=='tmmx'){
                    dmv <- which.max( dmvector[1:108,1]) 
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  
  #dmv <- which.max(abs( dmvector[1:108,1]) )
  dmv <- as.data.frame(dmv)
  colnames(dmv)[1] <- "row"
  
  #dmvector1a <- max(dmvector$correlations)
  #dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
  
  monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
  colnames(monthnumlist2)[1] <- "row"
  
  monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
  
  
  
  #makeRects <- function(tfMat){
  require(utils)
  cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
  
  monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
  
  my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
  
  setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
  
  #  png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
  lblcol <- c(9:1)
  dmvector3a <- round(dmvector3, digits = 2)
  
  
  newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
  
  

  
  dmvector3a[9,4:12] <- NA
  dmvector3a[8,5:12] <- NA
  dmvector3a[7,6:12] <- NA
  dmvector3a[6,7:12] <- NA
  dmvector3a[5,8:12] <- NA
  dmvector3a[4,9:12] <- NA
  dmvector3a[3,10:12] <- NA
  dmvector3a[2,11:12] <- NA
  dmvector3a[1,12:12] <- NA

  
  my_palette <- colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red", "darkred"))(n = 32)
  
  nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, symbreaks = FALSE, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ",  monthnumlist3$combined, sep=""))

#  nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ",  monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))

Step 2.2. Potential Evapotranspiration design matrix for Whitman County, WA, wheat insurance loss due to drought

  state_var <- "WA"
  county_var <- "Whitman"
  commodity_var <- "WHEAT"
  damage_var <- "Drought"
  climate_var <- "pet"
  predictor_var <- "cube_root_loss"
  
  library(gplots)
  library(plyr)
  library(dplyr)
  #library(plotly)
  #packageVersion('plotly')
  
  #response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
  
  #monthlist is jan-dec, each repeated 12 times 
  monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))  
  #numlist is 12 months, repeated 12 times
  numlist <- as.data.frame(rep((1:12), times = 12))
  #monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
  monthnumlist <- as.data.frame(cbind(monthlist, numlist))
  #renaming columns
  colnames(monthnumlist) <- c("month", "monthcode")
  #put the monthlist all together in one vector
  monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
  #rename the vector
  climmonth <- monthnumlist$combined
  
  designmatrix <- matrix(NA, ncol=12, nrow=12)
  
  #create an empty 144 vector to fill up with correlations between loss and climate
  dmvector <- as.data.frame(rep(NA, times=144))
  
  cl = 0
  for (ppp in climmonth) {
    cl = cl +1
    
    
    setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_rev4")
    file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
    climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
    
    
    setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_summaries_rev3")
    file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
    file2 <- subset(file2, state == state_var)
    file2 <- subset(file2, county == county_var)
    colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
    
    
    climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
    colnames(climatevar[3]) <- "zscore"
    kor <- join(climatevar, file2, by = "year")
    kor2 <- subset(kor, damagecause != "NA")
    colnames(kor2)[3] <- "zscore"
    # kor2[9] <- as.numeric(kor2$zscore)
    kor3 <- cor(kor2[1], kor2[9])
    
    #insert kor3 into designmatrix iteratively
    
    dmvector[cl,] <- kor3
    
  }
  
  dmvector <- as.data.frame(dmvector)
  colnames(dmvector) <- "correlations"
  
  
  
  
  dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) ) 
  dmvector2 <- dmvector2[nrow(dmvector2):1, ]
  dmvector3 <- dmvector2[4:12,]
  
  dmvector3[9,4:12] <- NA
  dmvector3[8,5:12] <- NA
  dmvector3[7,6:12] <- NA
  dmvector3[6,7:12] <- NA
  dmvector3[5,8:12] <- NA
  dmvector3[4,9:12] <- NA
  dmvector3[3,10:12] <- NA
  dmvector3[2,11:12] <- NA
  dmvector3[1,12:12] <- NA
  
  dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)

  if(climate_var=='pr'){
    dmv <- which.min( dmvector[1:108,1]) 
  } else {
    if(climate_var=='rmin'){
      dmv <- which.min( dmvector[1:108,1]) 
    } else {
      if(climate_var=='rmax'){
        dmv <- which.min( dmvector[1:108,1]) 
      } else {
        if(climate_var=='tmmn'){
          dmv <- which.max( dmvector[1:108,1]) 
        } else {
          if(climate_var=='pet'){
            dmv <- which.max( dmvector[1:108,1]) 
          } else {
            if(climate_var=='fm100'){
              dmv <- which.min( dmvector[1:108,1]) 
            } else {
              if(climate_var=='fm1000'){
                dmv <- which.min( dmvector[1:108,1]) 
              } else {
                if(climate_var=='pdsi'){
                  dmv <- which.min( dmvector[1:108,1]) 
                } else {
                  if(climate_var=='tmmx'){
                    dmv <- which.max( dmvector[1:108,1]) 
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  
  #dmv <- which.max(abs( dmvector[1:108,1]) )
  dmv <- as.data.frame(dmv)
  colnames(dmv)[1] <- "row"
  
  #dmvector1a <- max(dmvector$correlations)
  #dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
  
  monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
  colnames(monthnumlist2)[1] <- "row"
  
  monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
  
  
  
  #makeRects <- function(tfMat){
  require(utils)
  cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
  
  monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
  
  my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
  
  setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
  
  #  png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
  lblcol <- c(9:1)
  dmvector3a <- round(dmvector3, digits = 2)
  
  
  newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
  
  

  
  dmvector3a[9,4:12] <- NA
  dmvector3a[8,5:12] <- NA
  dmvector3a[7,6:12] <- NA
  dmvector3a[6,7:12] <- NA
  dmvector3a[5,8:12] <- NA
  dmvector3a[4,9:12] <- NA
  dmvector3a[3,10:12] <- NA
  dmvector3a[2,11:12] <- NA
  dmvector3a[1,12:12] <- NA

      
  my_palette <- colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red", "darkred"))(n = 32)
  
  nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, symbreaks = FALSE, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ",  monthnumlist3$combined, sep=""))

 # nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red")), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ",  monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))

Step 2.3. Precipitation design matrix for Whitman County, WA, wheat insurance loss due to drought

  state_var <- "WA"
  county_var <- "Whitman"
  commodity_var <- "WHEAT"
  damage_var <- "Drought"
  climate_var <- "pr"
  predictor_var <- "cube_root_loss"
  
  library(gplots)
  library(plyr)
  library(dplyr)
  #library(plotly)
  #packageVersion('plotly')
  
  #response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
  
  #monthlist is jan-dec, each repeated 12 times 
  monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))  
  #numlist is 12 months, repeated 12 times
  numlist <- as.data.frame(rep((1:12), times = 12))
  #monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
  monthnumlist <- as.data.frame(cbind(monthlist, numlist))
  #renaming columns
  colnames(monthnumlist) <- c("month", "monthcode")
  #put the monthlist all together in one vector
  monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
  #rename the vector
  climmonth <- monthnumlist$combined
  
  designmatrix <- matrix(NA, ncol=12, nrow=12)
  
  #create an empty 144 vector to fill up with correlations between loss and climate
  dmvector <- as.data.frame(rep(NA, times=144))
  
  cl = 0
  for (ppp in climmonth) {
    cl = cl +1
    
    
    setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_rev4")
    file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
    climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
    
    
    setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_summaries_rev3")
    file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
    file2 <- subset(file2, state == state_var)
    file2 <- subset(file2, county == county_var)
    colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
    
    
    climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
    colnames(climatevar[3]) <- "zscore"
    kor <- join(climatevar, file2, by = "year")
    kor2 <- subset(kor, damagecause != "NA")
    colnames(kor2)[3] <- "zscore"
    # kor2[9] <- as.numeric(kor2$zscore)
    kor3 <- cor(kor2[1], kor2[9])
    
    #insert kor3 into designmatrix iteratively
    
    dmvector[cl,] <- kor3
    
  }
  
  dmvector <- as.data.frame(dmvector)
  colnames(dmvector) <- "correlations"
  
  
  
  
  dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) ) 
  dmvector2 <- dmvector2[nrow(dmvector2):1, ]
  dmvector3 <- dmvector2[4:12,]
  
  dmvector3[9,4:12] <- NA
  dmvector3[8,5:12] <- NA
  dmvector3[7,6:12] <- NA
  dmvector3[6,7:12] <- NA
  dmvector3[5,8:12] <- NA
  dmvector3[4,9:12] <- NA
  dmvector3[3,10:12] <- NA
  dmvector3[2,11:12] <- NA
  dmvector3[1,12:12] <- NA
  
  dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)

  if(climate_var=='pr'){
    dmv <- which.min( dmvector[1:108,1]) 
  } else {
    if(climate_var=='rmin'){
      dmv <- which.min( dmvector[1:108,1]) 
    } else {
      if(climate_var=='rmax'){
        dmv <- which.min( dmvector[1:108,1]) 
      } else {
        if(climate_var=='tmmn'){
          dmv <- which.max( dmvector[1:108,1]) 
        } else {
          if(climate_var=='pet'){
            dmv <- which.max( dmvector[1:108,1]) 
          } else {
            if(climate_var=='fm100'){
              dmv <- which.min( dmvector[1:108,1]) 
            } else {
              if(climate_var=='fm1000'){
                dmv <- which.min( dmvector[1:108,1]) 
              } else {
                if(climate_var=='pdsi'){
                  dmv <- which.min( dmvector[1:108,1]) 
                } else {
                  if(climate_var=='tmmx'){
                    dmv <- which.max( dmvector[1:108,1]) 
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  
  #dmv <- which.max(abs( dmvector[1:108,1]) )
  dmv <- as.data.frame(dmv)
  colnames(dmv)[1] <- "row"
  
  #dmvector1a <- max(dmvector$correlations)
  #dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
  
  monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
  colnames(monthnumlist2)[1] <- "row"
  
  monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
  
  
  
  #makeRects <- function(tfMat){
  require(utils)
  cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
  
  monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
  
  my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
  
  setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
  
  #  png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
  lblcol <- c(9:1)
  dmvector3a <- round(dmvector3, digits = 2)
  
  
  newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
  
  

  
  dmvector3a[9,4:12] <- NA
  dmvector3a[8,5:12] <- NA
  dmvector3a[7,6:12] <- NA
  dmvector3a[6,7:12] <- NA
  dmvector3a[5,8:12] <- NA
  dmvector3a[4,9:12] <- NA
  dmvector3a[3,10:12] <- NA
  dmvector3a[2,11:12] <- NA
  dmvector3a[1,12:12] <- NA

  
  my_palette <- colorRampPalette(c("darkred", "red", "darkorange", "orange", "yellow", "lightyellow"))(n = 16)
  
  nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette,  scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ",  monthnumlist3$combined, sep=""))

 # nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ",  monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))

Step 2.3. Palmer Drought Severity Index (PDSI) design matrix for Whitman County, WA, wheat insurance loss due to drought

  state_var <- "WA"
  county_var <- "Whitman"
  commodity_var <- "WHEAT"
  damage_var <- "Drought"
  climate_var <- "pdsi"
  predictor_var <- "cube_root_loss"
  
  library(gplots)
  library(plyr)
  library(dplyr)
  #library(plotly)
  #packageVersion('plotly')
  
  #response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
  
  #monthlist is jan-dec, each repeated 12 times 
  monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))  
  #numlist is 12 months, repeated 12 times
  numlist <- as.data.frame(rep((1:12), times = 12))
  #monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
  monthnumlist <- as.data.frame(cbind(monthlist, numlist))
  #renaming columns
  colnames(monthnumlist) <- c("month", "monthcode")
  #put the monthlist all together in one vector
  monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
  #rename the vector
  climmonth <- monthnumlist$combined
  
  designmatrix <- matrix(NA, ncol=12, nrow=12)
  
  #create an empty 144 vector to fill up with correlations between loss and climate
  dmvector <- as.data.frame(rep(NA, times=144))
  
  cl = 0
  for (ppp in climmonth) {
    cl = cl +1
    
    
    setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_rev4")
    file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
    climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
    
    
    setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_summaries_rev3")
    file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
    file2 <- subset(file2, state == state_var)
    file2 <- subset(file2, county == county_var)
    colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
    
    
    climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
    colnames(climatevar[3]) <- "zscore"
    kor <- join(climatevar, file2, by = "year")
    kor2 <- subset(kor, damagecause != "NA")
    colnames(kor2)[3] <- "zscore"
    # kor2[9] <- as.numeric(kor2$zscore)
    kor3 <- cor(kor2[1], kor2[9])
    
    #insert kor3 into designmatrix iteratively
    
    dmvector[cl,] <- kor3
    
  }
  
  dmvector <- as.data.frame(dmvector)
  colnames(dmvector) <- "correlations"
  
  
  
  
  dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) ) 
  dmvector2 <- dmvector2[nrow(dmvector2):1, ]
  dmvector3 <- dmvector2[4:12,]
  
  dmvector3[9,4:12] <- NA
  dmvector3[8,5:12] <- NA
  dmvector3[7,6:12] <- NA
  dmvector3[6,7:12] <- NA
  dmvector3[5,8:12] <- NA
  dmvector3[4,9:12] <- NA
  dmvector3[3,10:12] <- NA
  dmvector3[2,11:12] <- NA
  dmvector3[1,12:12] <- NA
  
  dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)

  if(climate_var=='pr'){
    dmv <- which.min( dmvector[1:108,1]) 
  } else {
    if(climate_var=='rmin'){
      dmv <- which.min( dmvector[1:108,1]) 
    } else {
      if(climate_var=='rmax'){
        dmv <- which.min( dmvector[1:108,1]) 
      } else {
        if(climate_var=='tmmn'){
          dmv <- which.max( dmvector[1:108,1]) 
        } else {
          if(climate_var=='pet'){
            dmv <- which.max( dmvector[1:108,1]) 
          } else {
            if(climate_var=='fm100'){
              dmv <- which.min( dmvector[1:108,1]) 
            } else {
              if(climate_var=='fm1000'){
                dmv <- which.min( dmvector[1:108,1]) 
              } else {
                if(climate_var=='pdsi'){
                  dmv <- which.min( dmvector[1:108,1]) 
                } else {
                  if(climate_var=='tmmx'){
                    dmv <- which.max( dmvector[1:108,1]) 
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  
  #dmv <- which.max(abs( dmvector[1:108,1]) )
  dmv <- as.data.frame(dmv)
  colnames(dmv)[1] <- "row"
  
  #dmvector1a <- max(dmvector$correlations)
  #dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
  
  monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
  colnames(monthnumlist2)[1] <- "row"
  
  monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
  
  
  
  #makeRects <- function(tfMat){
  require(utils)
  cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
  
  monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
  
  my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
  
  setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
  
  #  png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
  lblcol <- c(9:1)
  dmvector3a <- round(dmvector3, digits = 2)
  
  
  newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
  
  

  
  dmvector3a[9,4:12] <- NA
  dmvector3a[8,5:12] <- NA
  dmvector3a[7,6:12] <- NA
  dmvector3a[6,7:12] <- NA
  dmvector3a[5,8:12] <- NA
  dmvector3a[4,9:12] <- NA
  dmvector3a[3,10:12] <- NA
  dmvector3a[2,11:12] <- NA
  dmvector3a[1,12:12] <- NA

  
  my_palette <- colorRampPalette(c("darkred", "red", "darkorange", "orange", "yellow", "lightyellow"))(n = 16)
  
  nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette,  scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ",  monthnumlist3$combined, sep=""))

 # nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ",  monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))

Step 3. Examining spatial relationships of climate lagged correlations

In step 3, we generate maps of our climate-lagged correlations, for each climate variable. Each county is labeled with the optimum monthly period that has the highest correlation for that climate variable and wheat/drought insurance loss (July 2 = two months previous to July, or May/June/July), along with the correlation value. Each correlation value is absolute (correlation coefficent - R)

Step 3.1. Precipitation time lagged correlations to wheat/drought insurance loss, per county, for the iPNW

library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)

 climate_var <- "pr"
  predictor_var <- "cube_root_loss"
  monthend <- "jul"
  monthnumber <- 2
    
      library(RColorBrewer)
      library(dplyr)
      
      setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_correlations_3/")
      
      
      if (predictor_var  == "loss") {
        
        predictor <- "crop_commodity_loss"
        
      }else {
        
        predictor <- predictor_var
      }
      
      
      
      files  <- list.files(pattern = predictor)
      filey <- do.call(rbind, strsplit(files, '[_]'))
      
      filey <- subset(filey, filey[,5] == climate_var)
      
      colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
      filey <- as.data.frame(filey)
      data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
      
      
      
      
      
      
      tables <- lapply(data, read.csv, header = TRUE)
      
      
      
      tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
      
      tables <- lapply(tables, function(x) arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
      
      
      for (i in 1:26) {
      tables[[i]][1,4:12] <- NA
      tables[[i]][2,5:12] <- NA
      tables[[i]][3,6:12] <- NA
      tables[[i]][4,7:12] <- NA
      tables[[i]][5,8:12] <- NA
      tables[[i]][6,9:12] <- NA
      tables[[i]][7,10:12] <- NA
      tables[[i]][8,11:12] <- NA
      tables[[i]][9,12:12] <- NA
      }
      
      
      monthly <- match(monthend, tolower(month.abb))
      
      
      if(climate_var=='pr'){
        
        bestcounty <- matrix(NA,nrow=26,ncol=3)
        for (i in 1:26) {
          temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
          temp2 <- min(tables[[i]], na.rm=T)
          bestcounty[i,1] <- temp[1,1]
          bestcounty[i,2] <- temp[1,2]
          bestcounty[i,3] <- temp2
          temp <- NA
          temp2 <- NA
          
        }
      } else {
        if(climate_var=='rmin'){
          
          bestcounty <- matrix(NA,nrow=26,ncol=3)
          for (i in 1:26) {
            temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
            temp2 <- min(tables[[i]], na.rm=T)
            bestcounty[i,1] <- temp[1,1]
            bestcounty[i,2] <- temp[1,2]
            bestcounty[i,3] <- temp2
            temp <- NA
            temp2 <- NA
          }
        } else {
          if(climate_var=='rmax'){
            
            bestcounty <- matrix(NA,nrow=26,ncol=3)
            for (i in 1:26) {
              temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
              temp2 <- max(tables[[i]], na.rm=T)
              bestcounty[i,1] <- temp[1,1]
              bestcounty[i,2] <- temp[1,2]
              bestcounty[i,3] <- temp2
              temp <- NA
              temp2 <- NA
            }
          } else {  
            if(climate_var=='tmmx'){ 
              bestcounty <- matrix(NA,nrow=26,ncol=3)
              for (i in 1:26) {
                temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                temp2 <- max(tables[[i]], na.rm=T)
                bestcounty[i,1] <- temp[1,1]
                bestcounty[i,2] <- temp[1,2]
                bestcounty[i,3] <- temp2
                temp <- NA
                temp2 <- NA
              }
            } else {
              if(climate_var=='tmin'){
                bestcounty <- matrix(NA,nrow=26,ncol=3)
                for (i in 1:26) {
                  temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                  temp2 <- max(tables[[i]], na.rm=T)
                  bestcounty[i,1] <- temp[1,1]
                  bestcounty[i,2] <- temp[1,2]
                  bestcounty[i,3] <- temp2
                  temp <- NA
                  temp2 <- NA
                }
              } else {
                if(climate_var=='fm100'){
                  bestcounty <- matrix(NA,nrow=26,ncol=3)
                  for (i in 1:26) {
                    temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                    temp2 <- max(tables[[i]], na.rm=T)
                    bestcounty[i,1] <- temp[1,1]
                    bestcounty[i,2] <- temp[1,2]
                    bestcounty[i,3] <- temp2
                    temp <- NA
                    temp2 <- NA
                  }
                } else {
                  if(climate_var=='fm1000'){
                    bestcounty <- matrix(NA,nrow=26,ncol=3)
                    for (i in 1:26) {
                      temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                      temp2 <- max(tables[[i]], na.rm=T)
                      bestcounty[i,1] <- temp[1,1]
                      bestcounty[i,2] <- temp[1,2]
                      bestcounty[i,3] <- temp2
                      temp <- NA
                      temp2 <- NA
                    }
                  } else {
                    if(climate_var=='pet'){
                      bestcounty <- matrix(NA,nrow=26,ncol=3)
                      for (i in 1:26) {
                        temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                        temp2 <- max(tables[[i]], na.rm=T)
                        bestcounty[i,1] <- temp[1,1]
                        bestcounty[i,2] <- temp[1,2]
                        bestcounty[i,3] <- temp2
                        temp <- NA
                        temp2 <- NA
                      }
                    } else {
                      if(climate_var=='pdsi'){
                        bestcounty <- matrix(NA,nrow=26,ncol=3)
                        for (i in 1:26) {
                          temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
                          temp2 <- min(tables[[i]], na.rm=T)
                          bestcounty[i,1] <- temp[1,1]
                          bestcounty[i,2] <- temp[1,2]
                          bestcounty[i,3] <- temp2
                          temp <- NA
                          temp2 <- NA
                        }
                        
                        
                        
                        
                        
                        
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
      
      
      
      
      bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
      
      bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
      colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
      #new
      
      
      
      
      
      #!!!!!!fix-row by column, or number of months by ending month
      table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
      
      
      table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
      colnames(table3) <- "correlations"
      #combined <- do.call(rbind , tables)
      
      table4 <- cbind(filey, table3)
      
      #if (predictor_var  == "loss") {
      
      #predictor_var <- "crop_commodity_loss"
      
      #}
      
      table5 <- table4[c(2:5,10)]
      
      colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
      
      #table5$STATE_NAME <-  state.name[match(table5[,1],state.abb)]
      
      
      
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      statez = c("Idaho", "Washington", "Oregon")
      Idaho_list1 <- paste("Idaho", "Lewis", "Nez Perce", "Latah", "Benewah", sep="|")
      Washington_list1 <- paste("Okananogan", "Douglas", "Grant", "Benton", "Franklin", "Walla Walla", "Adams", "Lincoln", "Spokane", "Whitman", "Columbia", "Garfield", "Asotin", sep="|")
      Oregon_list1 <- paste("Wasco", "Sherman", "Gilliam", "Morrow", "Umatilla", "Union", "Wallowa", sep="|")
      
      
      combinedlist2 <- paste("Okananogan", "Douglas", "Grant", "Benton", "Franklin", "Walla Walla", "Adams", "Lincoln", "Spokane", "Whitman", "Columbia", "Garfield", "Asotin", "Wasco", "Sherman", "Gilliam", "Morrow", "Umatilla", "Union", "Wallowa", "Idaho", "Lewis", "Nez Perce", "Clearwater", "Latah", "Benewah", "Kootenai", sep="|")
      combinedlist <- c(Idaho_list1, Washington_list1, Oregon_list1)
      
      #alllist <- c("Idaho", "Oregon", "Washington")
      
      
      #--Oregon
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      or_counties <- counties[grep("Oregon", counties@data$STATE_NAME),]
      palouse_Oregon_counties <- or_counties[grep(Oregon_list1, or_counties@data$NAME),]
      kk="Oregon"
      
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      OR_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Oregon_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      #--loop list for county by fip
      #countyfiploop <- counties@data$FIPS
      
      #--data frame of county fip list
      #countyfiplist <- data.frame(counties@data$FIPS)
      
      #--data frame of county names
      #countynames <- data.frame(counties@data$NAME)
      
      #combo of county names and fip for this list
      #countylist <- cbind(countynames, countyfiplist)
      
      #--number of rows in county list
      #countylistrows <- 12 * nrow(countylist)
      
      
      
      #---Washington
      
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      wa_counties <- counties[grep("Washington", counties@data$STATE_NAME),]
      palouse_Washington_counties <- wa_counties[grep(Washington_list1, wa_counties@data$NAME),]
      kk="Washington"
      
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      WA_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Washington_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      #-----Idaho
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      id_counties <- counties[grep("Idaho", counties@data$STATE_NAME),]
      palouse_Idaho_counties <- id_counties[grep(Idaho_list1, id_counties@data$NAME),]
      kk="Idaho"
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      ID_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Idaho_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      counties <- rbind(ID_counties, WA_counties, OR_counties)
      
      
      
      
      counties2 <- merge(counties, table5, by = "NAME" )
      
      #--new
      
      counties3 <- merge(counties2, bestcounty2, by = "NAME")
      counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
      
      #--new
      
      
      
      #colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
      #my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
      
      counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
      
      #pal <- colorNumeric(rev(my_palette),  na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      
      pal <- colorNumeric(colorRampPalette(c("red", "orange", "yellow", "lightyellow"))(11), na.color = "#ffffff",
                          domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      pal2 <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
                          domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      
      
      #--
      
      myLabelFormat = function(..., reverse_order = FALSE){ 
    if(reverse_order){ 
        function(type = "numeric", cuts){ 
            cuts <- sort(cuts, decreasing = T)
        } 
    }else{
        labelFormat(...)
    }
}
      
      #colorss = colorRampPalette(brewer.pal(11,"Spectral"))
      
      #finalcol <- colorss(len <- length(counties3$CORRELATION))
      #finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
      
      #cellselect <- paste(monthend, monthnumber, sep="")
      
      #par(mfrow=c(1,4))
      #layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE)) 
      #par(mar = c(1,1,1,1) + 0.1)
      #plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
      ##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
      
      #added from ID
      #corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
      #text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre,  sep=""), cex=1.5, col = "white", font = 2)
      
      #--
      
      exte <- extent(counties3)
      
      library(htmltools)
      
      tag.map.title <- tags$style(HTML("
                                       .leaflet-control.map-title { 
                                       transform: translate(-50%,20%);
                                       position: fixed !important;
                                       left: 50%;
                                       text-align: center;
                                       padding-left: 10px; 
                                       padding-right: 10px; 
                                       background: rgba(255,255,255,0.75);
                                       font-weight: bold;
                                       font-size: 24px;
                                       }
                                       "))
      
      title <- tags$div(
        tag.map.title, HTML(paste("IPNW  Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
      )  

      
      lat_long <- coordinates(counties3)
      
     
      #labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
   
      
      #counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
      counties3a <- data.frame(counties3)
      labs <- lapply(seq(nrow(counties3a)), function(i) {
        paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
                counties3a[i, "MONTHCOMBO"]) 
      })

      map <- leaflet(data = counties3) %>% 
        
        addProviderTiles(providers$Hydda.Base) %>%
        addProviderTiles(providers$Stamen.TonerLines) %>%
        
        #addControl(title, position = "topleft", className="map-title") %>% 
        
        fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
        addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
                                  
                                  'font-family'= 'serif',
                                  'font-style'= 'bold',
                                  'font-size' = '14px'
                                  ))) %>%
        
        addLegend(pal = pal2, values = counties3$CORRELATION,  labels = c("1", "2"), opacity = .5, title = paste("Correlation",  " Matrix", sep="<br>"),
                  position = "bottomright", labFormat = labelFormat(transform = function(x) sort(x, decreasing = TRUE)))
      
      map

Step 3.2. Potential Evapotranspiration time lagged correlations to wheat/drought insurance loss, per county, for the iPNW

 climate_var <- "pet"
  predictor_var <- "cube_root_loss"
  monthend <- "jul"
  monthnumber <- 2
    
      library(RColorBrewer)
      library(dplyr)
      
      setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_correlations_3/")
      
      
      if (predictor_var  == "loss") {
        
        predictor <- "crop_commodity_loss"
        
      }else {
        
        predictor <- predictor_var
      }
      
      
      
      files  <- list.files(pattern = predictor)
      filey <- do.call(rbind, strsplit(files, '[_]'))
      
      filey <- subset(filey, filey[,5] == climate_var)
      
      colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
      filey <- as.data.frame(filey)
      data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
      
      
      
      
      
      
      tables <- lapply(data, read.csv, header = TRUE)
      
      
      
      tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
      
      tables <- lapply(tables, function(x) arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
      
      
      for (i in 1:26) {
      tables[[i]][1,4:12] <- NA
      tables[[i]][2,5:12] <- NA
      tables[[i]][3,6:12] <- NA
      tables[[i]][4,7:12] <- NA
      tables[[i]][5,8:12] <- NA
      tables[[i]][6,9:12] <- NA
      tables[[i]][7,10:12] <- NA
      tables[[i]][8,11:12] <- NA
      tables[[i]][9,12:12] <- NA
      }
      
      
      monthly <- match(monthend, tolower(month.abb))
      
      
      if(climate_var=='pr'){
        
        bestcounty <- matrix(NA,nrow=26,ncol=3)
        for (i in 1:26) {
          temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
          temp2 <- min(tables[[i]], na.rm=T)
          bestcounty[i,1] <- temp[1,1]
          bestcounty[i,2] <- temp[1,2]
          bestcounty[i,3] <- temp2
          temp <- NA
          temp2 <- NA
          
        }
      } else {
        if(climate_var=='rmin'){
          
          bestcounty <- matrix(NA,nrow=26,ncol=3)
          for (i in 1:26) {
            temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
            temp2 <- min(tables[[i]], na.rm=T)
            bestcounty[i,1] <- temp[1,1]
            bestcounty[i,2] <- temp[1,2]
            bestcounty[i,3] <- temp2
            temp <- NA
            temp2 <- NA
          }
        } else {
          if(climate_var=='rmax'){
            
            bestcounty <- matrix(NA,nrow=26,ncol=3)
            for (i in 1:26) {
              temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
              temp2 <- max(tables[[i]], na.rm=T)
              bestcounty[i,1] <- temp[1,1]
              bestcounty[i,2] <- temp[1,2]
              bestcounty[i,3] <- temp2
              temp <- NA
              temp2 <- NA
            }
          } else {  
            if(climate_var=='tmmx'){ 
              bestcounty <- matrix(NA,nrow=26,ncol=3)
              for (i in 1:26) {
                temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                temp2 <- max(tables[[i]], na.rm=T)
                bestcounty[i,1] <- temp[1,1]
                bestcounty[i,2] <- temp[1,2]
                bestcounty[i,3] <- temp2
                temp <- NA
                temp2 <- NA
              }
            } else {
              if(climate_var=='tmin'){
                bestcounty <- matrix(NA,nrow=26,ncol=3)
                for (i in 1:26) {
                  temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                  temp2 <- max(tables[[i]], na.rm=T)
                  bestcounty[i,1] <- temp[1,1]
                  bestcounty[i,2] <- temp[1,2]
                  bestcounty[i,3] <- temp2
                  temp <- NA
                  temp2 <- NA
                }
              } else {
                if(climate_var=='fm100'){
                  bestcounty <- matrix(NA,nrow=26,ncol=3)
                  for (i in 1:26) {
                    temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                    temp2 <- max(tables[[i]], na.rm=T)
                    bestcounty[i,1] <- temp[1,1]
                    bestcounty[i,2] <- temp[1,2]
                    bestcounty[i,3] <- temp2
                    temp <- NA
                    temp2 <- NA
                  }
                } else {
                  if(climate_var=='fm1000'){
                    bestcounty <- matrix(NA,nrow=26,ncol=3)
                    for (i in 1:26) {
                      temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                      temp2 <- max(tables[[i]], na.rm=T)
                      bestcounty[i,1] <- temp[1,1]
                      bestcounty[i,2] <- temp[1,2]
                      bestcounty[i,3] <- temp2
                      temp <- NA
                      temp2 <- NA
                    }
                  } else {
                    if(climate_var=='pet'){
                      bestcounty <- matrix(NA,nrow=26,ncol=3)
                      for (i in 1:26) {
                        temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                        temp2 <- max(tables[[i]], na.rm=T)
                        bestcounty[i,1] <- temp[1,1]
                        bestcounty[i,2] <- temp[1,2]
                        bestcounty[i,3] <- temp2
                        temp <- NA
                        temp2 <- NA
                      }
                    } else {
                      if(climate_var=='pdsi'){
                        bestcounty <- matrix(NA,nrow=26,ncol=3)
                        for (i in 1:26) {
                          temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
                          temp2 <- min(tables[[i]], na.rm=T)
                          bestcounty[i,1] <- temp[1,1]
                          bestcounty[i,2] <- temp[1,2]
                          bestcounty[i,3] <- temp2
                          temp <- NA
                          temp2 <- NA
                        }
                        
                        
                        
                        
                        
                        
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
      
      
      
      
      bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
      
      bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
      colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
      #new
      
      
      
      
      
      #!!!!!!fix-row by column, or number of months by ending month
      table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
      
      
      table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
      colnames(table3) <- "correlations"
      #combined <- do.call(rbind , tables)
      
      table4 <- cbind(filey, table3)
      
      #if (predictor_var  == "loss") {
      
      #predictor_var <- "crop_commodity_loss"
      
      #}
      
      table5 <- table4[c(2:5,10)]
      
      colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
      
      #table5$STATE_NAME <-  state.name[match(table5[,1],state.abb)]
      
      
      
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      statez = c("Idaho", "Washington", "Oregon")
      Idaho_list1 <- paste("Idaho", "Lewis", "Nez Perce", "Latah", "Benewah", sep="|")
      Washington_list1 <- paste("Okananogan", "Douglas", "Grant", "Benton", "Franklin", "Walla Walla", "Adams", "Lincoln", "Spokane", "Whitman", "Columbia", "Garfield", "Asotin", sep="|")
      Oregon_list1 <- paste("Wasco", "Sherman", "Gilliam", "Morrow", "Umatilla", "Union", "Wallowa", sep="|")
      
      
      combinedlist2 <- paste("Okananogan", "Douglas", "Grant", "Benton", "Franklin", "Walla Walla", "Adams", "Lincoln", "Spokane", "Whitman", "Columbia", "Garfield", "Asotin", "Wasco", "Sherman", "Gilliam", "Morrow", "Umatilla", "Union", "Wallowa", "Idaho", "Lewis", "Nez Perce", "Clearwater", "Latah", "Benewah", "Kootenai", sep="|")
      combinedlist <- c(Idaho_list1, Washington_list1, Oregon_list1)
      
      #alllist <- c("Idaho", "Oregon", "Washington")
      
      
      #--Oregon
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      or_counties <- counties[grep("Oregon", counties@data$STATE_NAME),]
      palouse_Oregon_counties <- or_counties[grep(Oregon_list1, or_counties@data$NAME),]
      kk="Oregon"
      
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      OR_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Oregon_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      #--loop list for county by fip
      #countyfiploop <- counties@data$FIPS
      
      #--data frame of county fip list
      #countyfiplist <- data.frame(counties@data$FIPS)
      
      #--data frame of county names
      #countynames <- data.frame(counties@data$NAME)
      
      #combo of county names and fip for this list
      #countylist <- cbind(countynames, countyfiplist)
      
      #--number of rows in county list
      #countylistrows <- 12 * nrow(countylist)
      
      
      
      #---Washington
      
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      wa_counties <- counties[grep("Washington", counties@data$STATE_NAME),]
      palouse_Washington_counties <- wa_counties[grep(Washington_list1, wa_counties@data$NAME),]
      kk="Washington"
      
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      WA_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Washington_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      #-----Idaho
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      id_counties <- counties[grep("Idaho", counties@data$STATE_NAME),]
      palouse_Idaho_counties <- id_counties[grep(Idaho_list1, id_counties@data$NAME),]
      kk="Idaho"
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      ID_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Idaho_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      counties <- rbind(ID_counties, WA_counties, OR_counties)
      
      
      
      
      counties2 <- merge(counties, table5, by = "NAME" )
      
      #--new
      
      counties3 <- merge(counties2, bestcounty2, by = "NAME")
      counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
      
      #--new
      
      
      
      #colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
      #my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
      
      counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
      
      #pal <- colorNumeric(rev(my_palette),  na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      
      pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
                          domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      
      
      #--
      
      #colorss = colorRampPalette(brewer.pal(11,"Spectral"))
      
      #finalcol <- colorss(len <- length(counties3$CORRELATION))
      #finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
      
      #cellselect <- paste(monthend, monthnumber, sep="")
      
      #par(mfrow=c(1,4))
      #layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE)) 
      #par(mar = c(1,1,1,1) + 0.1)
      #plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
      ##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
      
      #added from ID
      #corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
      #text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre,  sep=""), cex=1.5, col = "white", font = 2)
      
      #--
      
      exte <- extent(counties3)
      
      library(htmltools)
      
      tag.map.title <- tags$style(HTML("
                                       .leaflet-control.map-title { 
                                       transform: translate(-50%,20%);
                                       position: fixed !important;
                                       left: 50%;
                                       text-align: center;
                                       padding-left: 10px; 
                                       padding-right: 10px; 
                                       background: rgba(255,255,255,0.75);
                                       font-weight: bold;
                                       font-size: 24px;
                                       }
                                       "))
      
      title <- tags$div(
        tag.map.title, HTML(paste("IPNW  Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
      )  

      
      lat_long <- coordinates(counties3)
      
     
      #labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
   
      
      #counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
      counties3a <- data.frame(counties3)
      labs <- lapply(seq(nrow(counties3a)), function(i) {
        paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
                counties3a[i, "MONTHCOMBO"]) 
      })

      map <- leaflet(data = counties3) %>% 
        
        addProviderTiles(providers$Hydda.Base) %>%
        addProviderTiles(providers$Stamen.TonerLines) %>%
        
        #addControl(title, position = "topleft", className="map-title") %>% 
        
        fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
        addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
                                  
                                  'font-family'= 'serif',
                                  'font-style'= 'bold',
                                  'font-size' = '14px'
                                  ))) %>%
        
        addLegend(pal = pal, values = counties3$CORRELATION,  labels = c("1", "2"), opacity = .5, title = paste("Correlation",  " Matrix", sep="<br>"),
                  position = "bottomright")
      
      map

Step 3.1. Maximum Temperature time lagged correlations to wheat/drought insurance loss, per county, for the iPNW

 climate_var <- "tmmx"
  predictor_var <- "cube_root_loss"
  monthend <- "jul"
  monthnumber <- 2
    
      library(RColorBrewer)
      library(dplyr)
      
      setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_correlations_3/")
      
      
      if (predictor_var  == "loss") {
        
        predictor <- "crop_commodity_loss"
        
      }else {
        
        predictor <- predictor_var
      }
      
      
      
      files  <- list.files(pattern = predictor)
      filey <- do.call(rbind, strsplit(files, '[_]'))
      
      filey <- subset(filey, filey[,5] == climate_var)
      
      colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
      filey <- as.data.frame(filey)
      data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
      
      
      
      
      
      
      tables <- lapply(data, read.csv, header = TRUE)
      
      
      
      tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
      
      tables <- lapply(tables, function(x) arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
      
      
      for (i in 1:26) {
      tables[[i]][1,4:12] <- NA
      tables[[i]][2,5:12] <- NA
      tables[[i]][3,6:12] <- NA
      tables[[i]][4,7:12] <- NA
      tables[[i]][5,8:12] <- NA
      tables[[i]][6,9:12] <- NA
      tables[[i]][7,10:12] <- NA
      tables[[i]][8,11:12] <- NA
      tables[[i]][9,12:12] <- NA
      }
      
      
      monthly <- match(monthend, tolower(month.abb))
      
      
      if(climate_var=='pr'){
        
        bestcounty <- matrix(NA,nrow=26,ncol=3)
        for (i in 1:26) {
          temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
          temp2 <- min(tables[[i]], na.rm=T)
          bestcounty[i,1] <- temp[1,1]
          bestcounty[i,2] <- temp[1,2]
          bestcounty[i,3] <- temp2
          temp <- NA
          temp2 <- NA
          
        }
      } else {
        if(climate_var=='rmin'){
          
          bestcounty <- matrix(NA,nrow=26,ncol=3)
          for (i in 1:26) {
            temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
            temp2 <- min(tables[[i]], na.rm=T)
            bestcounty[i,1] <- temp[1,1]
            bestcounty[i,2] <- temp[1,2]
            bestcounty[i,3] <- temp2
            temp <- NA
            temp2 <- NA
          }
        } else {
          if(climate_var=='rmax'){
            
            bestcounty <- matrix(NA,nrow=26,ncol=3)
            for (i in 1:26) {
              temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
              temp2 <- max(tables[[i]], na.rm=T)
              bestcounty[i,1] <- temp[1,1]
              bestcounty[i,2] <- temp[1,2]
              bestcounty[i,3] <- temp2
              temp <- NA
              temp2 <- NA
            }
          } else {  
            if(climate_var=='tmmx'){ 
              bestcounty <- matrix(NA,nrow=26,ncol=3)
              for (i in 1:26) {
                temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                temp2 <- max(tables[[i]], na.rm=T)
                bestcounty[i,1] <- temp[1,1]
                bestcounty[i,2] <- temp[1,2]
                bestcounty[i,3] <- temp2
                temp <- NA
                temp2 <- NA
              }
            } else {
              if(climate_var=='tmin'){
                bestcounty <- matrix(NA,nrow=26,ncol=3)
                for (i in 1:26) {
                  temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                  temp2 <- max(tables[[i]], na.rm=T)
                  bestcounty[i,1] <- temp[1,1]
                  bestcounty[i,2] <- temp[1,2]
                  bestcounty[i,3] <- temp2
                  temp <- NA
                  temp2 <- NA
                }
              } else {
                if(climate_var=='fm100'){
                  bestcounty <- matrix(NA,nrow=26,ncol=3)
                  for (i in 1:26) {
                    temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                    temp2 <- max(tables[[i]], na.rm=T)
                    bestcounty[i,1] <- temp[1,1]
                    bestcounty[i,2] <- temp[1,2]
                    bestcounty[i,3] <- temp2
                    temp <- NA
                    temp2 <- NA
                  }
                } else {
                  if(climate_var=='fm1000'){
                    bestcounty <- matrix(NA,nrow=26,ncol=3)
                    for (i in 1:26) {
                      temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                      temp2 <- max(tables[[i]], na.rm=T)
                      bestcounty[i,1] <- temp[1,1]
                      bestcounty[i,2] <- temp[1,2]
                      bestcounty[i,3] <- temp2
                      temp <- NA
                      temp2 <- NA
                    }
                  } else {
                    if(climate_var=='pet'){
                      bestcounty <- matrix(NA,nrow=26,ncol=3)
                      for (i in 1:26) {
                        temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                        temp2 <- max(tables[[i]], na.rm=T)
                        bestcounty[i,1] <- temp[1,1]
                        bestcounty[i,2] <- temp[1,2]
                        bestcounty[i,3] <- temp2
                        temp <- NA
                        temp2 <- NA
                      }
                    } else {
                      if(climate_var=='pdsi'){
                        bestcounty <- matrix(NA,nrow=26,ncol=3)
                        for (i in 1:26) {
                          temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
                          temp2 <- min(tables[[i]], na.rm=T)
                          bestcounty[i,1] <- temp[1,1]
                          bestcounty[i,2] <- temp[1,2]
                          bestcounty[i,3] <- temp2
                          temp <- NA
                          temp2 <- NA
                        }
                        
                        
                        
                        
                        
                        
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
      
      
      
      
      bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
      
      bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
      colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
      #new
      
      
      
      
      
      #!!!!!!fix-row by column, or number of months by ending month
      table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
      
      
      table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
      colnames(table3) <- "correlations"
      #combined <- do.call(rbind , tables)
      
      table4 <- cbind(filey, table3)
      
      #if (predictor_var  == "loss") {
      
      #predictor_var <- "crop_commodity_loss"
      
      #}
      
      table5 <- table4[c(2:5,10)]
      
      colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
      
      #table5$STATE_NAME <-  state.name[match(table5[,1],state.abb)]
      
      
      
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      statez = c("Idaho", "Washington", "Oregon")
      Idaho_list1 <- paste("Idaho", "Lewis", "Nez Perce", "Latah", "Benewah", sep="|")
      Washington_list1 <- paste("Okananogan", "Douglas", "Grant", "Benton", "Franklin", "Walla Walla", "Adams", "Lincoln", "Spokane", "Whitman", "Columbia", "Garfield", "Asotin", sep="|")
      Oregon_list1 <- paste("Wasco", "Sherman", "Gilliam", "Morrow", "Umatilla", "Union", "Wallowa", sep="|")
      
      
      combinedlist2 <- paste("Okananogan", "Douglas", "Grant", "Benton", "Franklin", "Walla Walla", "Adams", "Lincoln", "Spokane", "Whitman", "Columbia", "Garfield", "Asotin", "Wasco", "Sherman", "Gilliam", "Morrow", "Umatilla", "Union", "Wallowa", "Idaho", "Lewis", "Nez Perce", "Clearwater", "Latah", "Benewah", "Kootenai", sep="|")
      combinedlist <- c(Idaho_list1, Washington_list1, Oregon_list1)
      
      #alllist <- c("Idaho", "Oregon", "Washington")
      
      
      #--Oregon
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      or_counties <- counties[grep("Oregon", counties@data$STATE_NAME),]
      palouse_Oregon_counties <- or_counties[grep(Oregon_list1, or_counties@data$NAME),]
      kk="Oregon"
      
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      OR_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Oregon_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      #--loop list for county by fip
      #countyfiploop <- counties@data$FIPS
      
      #--data frame of county fip list
      #countyfiplist <- data.frame(counties@data$FIPS)
      
      #--data frame of county names
      #countynames <- data.frame(counties@data$NAME)
      
      #combo of county names and fip for this list
      #countylist <- cbind(countynames, countyfiplist)
      
      #--number of rows in county list
      #countylistrows <- 12 * nrow(countylist)
      
      
      
      #---Washington
      
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      wa_counties <- counties[grep("Washington", counties@data$STATE_NAME),]
      palouse_Washington_counties <- wa_counties[grep(Washington_list1, wa_counties@data$NAME),]
      kk="Washington"
      
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      WA_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Washington_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      #-----Idaho
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      id_counties <- counties[grep("Idaho", counties@data$STATE_NAME),]
      palouse_Idaho_counties <- id_counties[grep(Idaho_list1, id_counties@data$NAME),]
      kk="Idaho"
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      ID_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Idaho_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      counties <- rbind(ID_counties, WA_counties, OR_counties)
      
      
      
      
      counties2 <- merge(counties, table5, by = "NAME" )
      
      #--new
      
      counties3 <- merge(counties2, bestcounty2, by = "NAME")
      counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
      
      #--new
      
      
      
      #colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
      #my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
      
      counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
      
      #pal <- colorNumeric(rev(my_palette),  na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      
      pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
                          domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      
      
      #--
      
      #colorss = colorRampPalette(brewer.pal(11,"Spectral"))
      
      #finalcol <- colorss(len <- length(counties3$CORRELATION))
      #finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
      
      #cellselect <- paste(monthend, monthnumber, sep="")
      
      #par(mfrow=c(1,4))
      #layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE)) 
      #par(mar = c(1,1,1,1) + 0.1)
      #plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
      ##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
      
      #added from ID
      #corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
      #text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre,  sep=""), cex=1.5, col = "white", font = 2)
      
      #--
      
      exte <- extent(counties3)
      
      library(htmltools)
      
      tag.map.title <- tags$style(HTML("
                                       .leaflet-control.map-title { 
                                       transform: translate(-50%,20%);
                                       position: fixed !important;
                                       left: 50%;
                                       text-align: center;
                                       padding-left: 10px; 
                                       padding-right: 10px; 
                                       background: rgba(255,255,255,0.75);
                                       font-weight: bold;
                                       font-size: 24px;
                                       }
                                       "))
      
      title <- tags$div(
        tag.map.title, HTML(paste("IPNW  Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
      )  

      
      lat_long <- coordinates(counties3)
      
     
      #labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
   
      
      #counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
      counties3a <- data.frame(counties3)
      labs <- lapply(seq(nrow(counties3a)), function(i) {
        paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
                counties3a[i, "MONTHCOMBO"]) 
      })

      map <- leaflet(data = counties3) %>% 
        
        addProviderTiles(providers$Hydda.Base) %>%
        addProviderTiles(providers$Stamen.TonerLines) %>%
        
        #addControl(title, position = "topleft", className="map-title") %>% 
        
        fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
        addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
                                  
                                  'font-family'= 'serif',
                                  'font-style'= 'bold',
                                  'font-size' = '14px'
                                  ))) %>%
        
        addLegend(pal = pal, values = counties3$CORRELATION,  labels = c("1", "2"), opacity = .5, title = paste("Correlation",  " Matrix", sep="<br>"),
                  position = "bottomright")
      
      map

Step 3.1. Palmer Drought Severity Index (PDSI) time lagged correlations to wheat/drought insurance loss, per county, for the iPNW

 climate_var <- "pdsi"
  predictor_var <- "cube_root_loss"
  monthend <- "jul"
  monthnumber <- 2
    
      library(RColorBrewer)
      library(dplyr)
      
      setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_correlations_3/")
      
      
      if (predictor_var  == "loss") {
        
        predictor <- "crop_commodity_loss"
        
      }else {
        
        predictor <- predictor_var
      }
      
      
      
      files  <- list.files(pattern = predictor)
      filey <- do.call(rbind, strsplit(files, '[_]'))
      
      filey <- subset(filey, filey[,5] == climate_var)
      
      colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
      filey <- as.data.frame(filey)
      data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
      
      
      
      
      
      
      tables <- lapply(data, read.csv, header = TRUE)
      
      
      
      tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
      
      tables <- lapply(tables, function(x) arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
      
      
      for (i in 1:26) {
      tables[[i]][1,4:12] <- NA
      tables[[i]][2,5:12] <- NA
      tables[[i]][3,6:12] <- NA
      tables[[i]][4,7:12] <- NA
      tables[[i]][5,8:12] <- NA
      tables[[i]][6,9:12] <- NA
      tables[[i]][7,10:12] <- NA
      tables[[i]][8,11:12] <- NA
      tables[[i]][9,12:12] <- NA
      }
      
      
      monthly <- match(monthend, tolower(month.abb))
      
      
      if(climate_var=='pr'){
        
        bestcounty <- matrix(NA,nrow=26,ncol=3)
        for (i in 1:26) {
          temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
          temp2 <- min(tables[[i]], na.rm=T)
          bestcounty[i,1] <- temp[1,1]
          bestcounty[i,2] <- temp[1,2]
          bestcounty[i,3] <- temp2
          temp <- NA
          temp2 <- NA
          
        }
      } else {
        if(climate_var=='rmin'){
          
          bestcounty <- matrix(NA,nrow=26,ncol=3)
          for (i in 1:26) {
            temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
            temp2 <- min(tables[[i]], na.rm=T)
            bestcounty[i,1] <- temp[1,1]
            bestcounty[i,2] <- temp[1,2]
            bestcounty[i,3] <- temp2
            temp <- NA
            temp2 <- NA
          }
        } else {
          if(climate_var=='rmax'){
            
            bestcounty <- matrix(NA,nrow=26,ncol=3)
            for (i in 1:26) {
              temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
              temp2 <- max(tables[[i]], na.rm=T)
              bestcounty[i,1] <- temp[1,1]
              bestcounty[i,2] <- temp[1,2]
              bestcounty[i,3] <- temp2
              temp <- NA
              temp2 <- NA
            }
          } else {  
            if(climate_var=='tmmx'){ 
              bestcounty <- matrix(NA,nrow=26,ncol=3)
              for (i in 1:26) {
                temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                temp2 <- max(tables[[i]], na.rm=T)
                bestcounty[i,1] <- temp[1,1]
                bestcounty[i,2] <- temp[1,2]
                bestcounty[i,3] <- temp2
                temp <- NA
                temp2 <- NA
              }
            } else {
              if(climate_var=='tmin'){
                bestcounty <- matrix(NA,nrow=26,ncol=3)
                for (i in 1:26) {
                  temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                  temp2 <- max(tables[[i]], na.rm=T)
                  bestcounty[i,1] <- temp[1,1]
                  bestcounty[i,2] <- temp[1,2]
                  bestcounty[i,3] <- temp2
                  temp <- NA
                  temp2 <- NA
                }
              } else {
                if(climate_var=='fm100'){
                  bestcounty <- matrix(NA,nrow=26,ncol=3)
                  for (i in 1:26) {
                    temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                    temp2 <- max(tables[[i]], na.rm=T)
                    bestcounty[i,1] <- temp[1,1]
                    bestcounty[i,2] <- temp[1,2]
                    bestcounty[i,3] <- temp2
                    temp <- NA
                    temp2 <- NA
                  }
                } else {
                  if(climate_var=='fm1000'){
                    bestcounty <- matrix(NA,nrow=26,ncol=3)
                    for (i in 1:26) {
                      temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                      temp2 <- max(tables[[i]], na.rm=T)
                      bestcounty[i,1] <- temp[1,1]
                      bestcounty[i,2] <- temp[1,2]
                      bestcounty[i,3] <- temp2
                      temp <- NA
                      temp2 <- NA
                    }
                  } else {
                    if(climate_var=='pet'){
                      bestcounty <- matrix(NA,nrow=26,ncol=3)
                      for (i in 1:26) {
                        temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
                        temp2 <- max(tables[[i]], na.rm=T)
                        bestcounty[i,1] <- temp[1,1]
                        bestcounty[i,2] <- temp[1,2]
                        bestcounty[i,3] <- temp2
                        temp <- NA
                        temp2 <- NA
                      }
                    } else {
                      if(climate_var=='pdsi'){
                        bestcounty <- matrix(NA,nrow=26,ncol=3)
                        for (i in 1:26) {
                          temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
                          temp2 <- min(tables[[i]], na.rm=T)
                          bestcounty[i,1] <- temp[1,1]
                          bestcounty[i,2] <- temp[1,2]
                          bestcounty[i,3] <- temp2
                          temp <- NA
                          temp2 <- NA
                        }
                        
                        
                        
                        
                        
                        
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
      
      
      
      
      bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
      
      bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
      colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
      #new
      
      
      
      
      
      #!!!!!!fix-row by column, or number of months by ending month
      table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
      
      
      table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
      colnames(table3) <- "correlations"
      #combined <- do.call(rbind , tables)
      
      table4 <- cbind(filey, table3)
      
      #if (predictor_var  == "loss") {
      
      #predictor_var <- "crop_commodity_loss"
      
      #}
      
      table5 <- table4[c(2:5,10)]
      
      colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
      
      #table5$STATE_NAME <-  state.name[match(table5[,1],state.abb)]
      
      
      
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      statez = c("Idaho", "Washington", "Oregon")
      Idaho_list1 <- paste("Idaho", "Lewis", "Nez Perce", "Latah", "Benewah", sep="|")
      Washington_list1 <- paste("Okananogan", "Douglas", "Grant", "Benton", "Franklin", "Walla Walla", "Adams", "Lincoln", "Spokane", "Whitman", "Columbia", "Garfield", "Asotin", sep="|")
      Oregon_list1 <- paste("Wasco", "Sherman", "Gilliam", "Morrow", "Umatilla", "Union", "Wallowa", sep="|")
      
      
      combinedlist2 <- paste("Okananogan", "Douglas", "Grant", "Benton", "Franklin", "Walla Walla", "Adams", "Lincoln", "Spokane", "Whitman", "Columbia", "Garfield", "Asotin", "Wasco", "Sherman", "Gilliam", "Morrow", "Umatilla", "Union", "Wallowa", "Idaho", "Lewis", "Nez Perce", "Clearwater", "Latah", "Benewah", "Kootenai", sep="|")
      combinedlist <- c(Idaho_list1, Washington_list1, Oregon_list1)
      
      #alllist <- c("Idaho", "Oregon", "Washington")
      
      
      #--Oregon
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      or_counties <- counties[grep("Oregon", counties@data$STATE_NAME),]
      palouse_Oregon_counties <- or_counties[grep(Oregon_list1, or_counties@data$NAME),]
      kk="Oregon"
      
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      OR_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Oregon_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      #--loop list for county by fip
      #countyfiploop <- counties@data$FIPS
      
      #--data frame of county fip list
      #countyfiplist <- data.frame(counties@data$FIPS)
      
      #--data frame of county names
      #countynames <- data.frame(counties@data$NAME)
      
      #combo of county names and fip for this list
      #countylist <- cbind(countynames, countyfiplist)
      
      #--number of rows in county list
      #countylistrows <- 12 * nrow(countylist)
      
      
      
      #---Washington
      
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      wa_counties <- counties[grep("Washington", counties@data$STATE_NAME),]
      palouse_Washington_counties <- wa_counties[grep(Washington_list1, wa_counties@data$NAME),]
      kk="Washington"
      
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      WA_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Washington_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      #-----Idaho
      
      
      setwd("/dmine/data/counties/")
      
      counties <- readShapePoly('UScounties.shp', 
                                proj4string=CRS
                                ("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
      projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
      
      id_counties <- counties[grep("Idaho", counties@data$STATE_NAME),]
      palouse_Idaho_counties <- id_counties[grep(Idaho_list1, id_counties@data$NAME),]
      kk="Idaho"
      #counties <- counties[grep("Idaho|Washington|Oregon|Montana", counties@data$STATE_NAME),]
      ID_counties <- assign(paste("palouse_", kk, "_counties", sep=""), palouse_Idaho_counties)
      #counties <- counties[grep(scen_state, counties@data$STATE_NAME),]
      
      counties <- rbind(ID_counties, WA_counties, OR_counties)
      
      
      
      
      counties2 <- merge(counties, table5, by = "NAME" )
      
      #--new
      
      counties3 <- merge(counties2, bestcounty2, by = "NAME")
      counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
      
      #--new
      
      
      
      #colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
      #my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
      
      counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
      
      #pal <- colorNumeric(rev(my_palette),  na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      
      pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
                          domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
      
      
      #--
      
      #colorss = colorRampPalette(brewer.pal(11,"Spectral"))
      
      #finalcol <- colorss(len <- length(counties3$CORRELATION))
      #finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
      
      #cellselect <- paste(monthend, monthnumber, sep="")
      
      #par(mfrow=c(1,4))
      #layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE)) 
      #par(mar = c(1,1,1,1) + 0.1)
      #plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
      ##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
      
      #added from ID
      #corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
      #text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre,  sep=""), cex=1.5, col = "white", font = 2)
      
      #--
      
      exte <- extent(counties3)
      
      library(htmltools)
      
      tag.map.title <- tags$style(HTML("
                                       .leaflet-control.map-title { 
                                       transform: translate(-50%,20%);
                                       position: fixed !important;
                                       left: 50%;
                                       text-align: center;
                                       padding-left: 10px; 
                                       padding-right: 10px; 
                                       background: rgba(255,255,255,0.75);
                                       font-weight: bold;
                                       font-size: 24px;
                                       }
                                       "))
      
      title <- tags$div(
        tag.map.title, HTML(paste("IPNW  Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
      )  

      
      lat_long <- coordinates(counties3)
      
     
      #labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
   
      
      #counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
      counties3a <- data.frame(counties3)
      labs <- lapply(seq(nrow(counties3a)), function(i) {
        paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
                counties3a[i, "MONTHCOMBO"]) 
      })

      map <- leaflet(data = counties3) %>% 
        
        addProviderTiles(providers$Hydda.Base) %>%
        addProviderTiles(providers$Stamen.TonerLines) %>%
        
        #addControl(title, position = "topleft", className="map-title") %>% 
        
        fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
        addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
                                  
                                  'font-family'= 'serif',
                                  'font-style'= 'bold',
                                  'font-size' = '14px'
                                  ))) %>%
        
        addLegend(pal = pal, values = counties3$CORRELATION,  labels = c("1", "2"), opacity = .5, title = paste("Correlation",  " Matrix", sep="<br>"),
                  position = "bottomright")
      
      map

Step 3.1. Precipitation time lagged correlations to wheat/drought insurance loss, per county, for the iPNW

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) 
{ 
  usr <- par("usr"); on.exit(par(usr)) 
  par(usr = c(0, 1, 0, 1)) 
  r <- abs(cor(x, y)) 
  txt <- format(c(r, 0.123456789), digits = digits)[1] 
  txt <- paste0(prefix, txt) 
  if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) 
  text(0.5, 0.5, txt, cex = cex.cor * r) 
} 

var_commodity <- "WHEAT"
var_damage <- "Drought"

#load wheat pricing

#barley <- read.csv("/dmine/data/USDAprices/barleyprices_1988_2017.csv", header=TRUE, strip.white =TRUE)
wheatprice <- read.csv("/dmine/data/USDAprices/wheatprices_1998_2017.csv", header=TRUE, strip.white =TRUE)
wheatprice_year <- aggregate(wheatprice$Price, list(wheatprice$Year), FUN="mean")
colnames(wheatprice_year) <- c("year", "price")

wheatprice <- read.csv("/dmine/data/USDAprices/wheat_prices_revised_1989_2015.csv", header=TRUE, strip.white =TRUE)
wheatprice <- wheatprice[-1]

colnames(wheatprice) <- c("year", "price")

#--accessing output of design matrix/time lag data based on monthly selection from dashboard runs

var1 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pr_jun2_cube_root_loss_climatecorrelation.csv")
colnames(var1)[9] <- paste(colnames(var1)[2], "_zscore", sep="")
var2 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pet_jul3_cube_root_loss_climatecorrelation.csv")
colnames(var2)[9] <- paste(colnames(var2)[2], "_zscore", sep="")
var3 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/tmmx_jul2_cube_root_loss_climatecorrelation.csv")
colnames(var3)[9] <- paste(colnames(var3)[2], "_zscore", sep="")
var4 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pr_jun2_cube_root_acres_climatecorrelation.csv")
colnames(var4)[9] <- paste(colnames(var4)[2], "_zscore", sep="")







var5 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pet_jun2_loss_per_acre_climatecorrelation.csv")
colnames(var5)[9] <- paste(colnames(var5)[2], "_zscore", sep="")
var6 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/tmmx_jun1_cube_root_acres_climatecorrelation.csv")
colnames(var6)[9] <- paste(colnames(var6)[2], "_zscore", sep="")


var7 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pr_sep5_loss_climatedata.csv")
colnames(var7)[9] <- paste(colnames(var7)[2], "_zscore", sep="")


var8 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pet_sep5_loss_climatedata.csv")
colnames(var8)[9] <- paste(colnames(var8)[2], "_zscore", sep="")
var9 <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/tmmx_jul2_loss_climatedata.csv")
colnames(var9)[9] <- paste(colnames(var9)[2], "_zscore", sep="")






var9x <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/fm100_jul3_cube_root_loss_climatedata.csv")
colnames(var9x)[9] <- paste(colnames(var9x)[2], "_zscore", sep="")

var10x <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/fm1000_aug2_cube_root_loss_climatedata.csv")
colnames(var10x)[9] <- paste(colnames(var10x)[2], "_zscore", sep="")

var11x <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pdsi_sep4_cube_root_loss_climatedata.csv")
colnames(var11x)[9] <- paste(colnames(var11x)[2], "_zscore", sep="")

var12x <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pdsi_sep4_cube_root_loss_climatecorrelation.csv")
colnames(var12x)[9] <- paste(colnames(var12x)[2], "_zscore", sep="")







var7a <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pr_sep5_loss_climatecorrelation.csv")
colnames(var7a)[9] <- paste(colnames(var7a)[2], "_zscore", sep="")


var8a <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/pet_sep5_loss_climatecorrelation.csv")
colnames(var8a)[9] <- paste(colnames(var8a)[2], "_zscore", sep="")
var9a <- read.csv("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_outputs/tmmx_jul2_loss_climatecorrelation.csv")
colnames(var9a)[9] <- paste(colnames(var9a)[2], "_zscore", sep="")







data1 <- cbind(var1, var2[9], var3[9])
data2 <- cbind(var1[1:6], var2[2], var3[2])

data3 <- cbind(var4[1:6], var5[2], var6[2])

data3 <- plyr::join(data3, wheatprice_year, by = "year")

data4 <- cbind(var7[1:6], var8[2], var9[2], var9x[2], var10x[2], var11x[2], var12x[3] )
#data4$prpet <- data4$pr / data4$pet
data4a <- left_join(data4, var7a, by = c("year" = "year", "county" = "county"))
data4a <- left_join(data4a, wheatprice_year, by = c("year"))
data4aa <- na.omit(data4a)

colnames(data4aa) <- c("X", "pr", "year", "pr_zscore", "damagecause", "county", "pet", "tmmx", "fm100", "fm1000", "pdsi", "cube_loss", "X.y", "pr2", "loss", "state", "commodity", "matrixnumber", "clim_zscore", "loss_zscore", "price")
data4aa$prpet <- data4aa$pr / data4aa$pet

data4aa <- subset(data4aa, , c(year, damagecause, county, commodity, state, matrixnumber, cube_loss, pr, pdsi, pet, tmmx, fm100, fm1000, price, prpet))
write.csv(data4aa, file = "/dmine/data/USDA/agmesh-scenarios/Allstates/summaries/lag_palouse1.csv")


data4aa$pr_scaled <- scale(data4aa$pr, scale = TRUE, center = TRUE)
data4aa$tmmx_scaled <- scale(data4aa$tmmx, scale = TRUE, center = TRUE)
data4aa$pet_scaled <- scale(data4aa$pet, scale = TRUE, center = TRUE)
data4aa$pdsi_scaled <- scale(data4aa$pdsi, scale = TRUE, center = TRUE)
data4aa$price_scaled <- scale(data4aa$price, scale = TRUE, center = TRUE)


#percentage acreage by county and year, per state


library(plotrix)
library(ggplot2)
  library(gridExtra)


setwd("/dmine/data/USDA/agmesh-scenarios/Idaho/summaries")
temp = list.files(pattern = "_summary")
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)

ID1 <- aggregate(.~countyfips + year, ziggy.df, sum)
ID1 <- aggregate(.~countyfips, ID1, mean)

setwd("/dmine/data/USDA/agmesh-scenarios/Washington/summaries")
temp = list.files(pattern = "_summary")
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)

WA1 <- aggregate(.~countyfips + year, ziggy.df, sum)
WA1 <- aggregate(.~countyfips, WA1, mean)

setwd("/dmine/data/USDA/agmesh-scenarios/Oregon/summaries")
temp = list.files(pattern = "_summary")
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)

OR1 <- aggregate(.~countyfips + year, ziggy.df, sum)
OR1 <- aggregate(.~countyfips, OR1, mean)

countiesfips <- read.csv("/dmine/data/counties/counties_fips.csv", header = TRUE)
colnames(countiesfips) <- c("countyfips", "county", "state")

OR2 <- merge(countiesfips, OR1, by=("countyfips"))
ID2 <- merge(countiesfips, ID1, by=("countyfips"))
WA2 <- merge(countiesfips, WA1, by=("countyfips"))

#--OR

setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/")
#setwd(monthdir2)

temp = list.files(pattern = paste("Oregon", "_summary", sep=""))
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)
xrange <- as.data.frame(ziggy.df)
xrange$county <- as.character(xrange$county)
xrange$damagecause <- as.character(xrange$damagecause)

xrange_1989 <- subset(xrange, year <= 2000)
xrange_2015 <- subset(xrange, year >= 2001)
xrange_1989$loss <- xrange_1989$acres
xrange_1989$acres <- NA

xrange <- rbind(xrange_2015)

xrange$commodity <- trimws(xrange$commodity)
xrange$county <- trimws(xrange$county)
xrange$damagecause <- trimws(xrange$damagecause)


#--acres for all damage causes aggregated
OR_loss_commodity <- subset(xrange, commodity == var_commodity)
OR_loss_all <- aggregate(OR_loss_commodity$acres, by=list(OR_loss_commodity$year, OR_loss_commodity$county, OR_loss_commodity$state), FUN = "sum")
colnames(OR_loss_all) <- c("year", "county", "state", "all_damagecause_acres")

OR_loss1 <- subset(xrange, commodity == var_commodity & damagecause == var_damage)
OR_loss2 <- aggregate(OR_loss1$loss, by=list(OR_loss1$year, OR_loss1$county, OR_loss1$state), FUN = "sum")
OR_loss2_acres <- aggregate(OR_loss1$acres, by=list(OR_loss1$year, OR_loss1$county, OR_loss1$state), FUN = "sum")
colnames(OR_loss2) <- c("year", "county", "state", "loss")
colnames(OR_loss2_acres) <- c("year", "county", "state", "acres")

OR_loss2$acres <- OR_loss2_acres$acres
OR_loss2$lossperacre <- OR_loss2$loss / OR_loss2$acres


OR_loss_climate <- merge(OR_loss2, OR2[-4], by=c("county", "state"))
OR_loss_climate_2 <- merge(OR_loss_all, OR_loss_climate, by=c("year", "county", "state"))


#-WA

setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/")
#setwd(monthdir2)

temp = list.files(pattern = paste("Washington", "_summary", sep=""))
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)
xrange <- as.data.frame(ziggy.df)
xrange$county <- as.character(xrange$county)
xrange$damagecause <- as.character(xrange$damagecause)

xrange_1989 <- subset(xrange, year <= 2000)
xrange_2015 <- subset(xrange, year >= 2001)
xrange_1989$loss <- xrange_1989$acres
xrange_1989$acres <- NA

xrange <- rbind(xrange_2015)

xrange$commodity <- trimws(xrange$commodity)
xrange$county <- trimws(xrange$county)
xrange$damagecause <- trimws(xrange$damagecause)


#--acres for all damage causes aggregated
WA_loss_commodity <- subset(xrange, commodity == var_commodity)
WA_loss_all <- aggregate(WA_loss_commodity$acres, by=list(WA_loss_commodity$year, WA_loss_commodity$county, WA_loss_commodity$state), FUN = "sum")
colnames(WA_loss_all) <- c("year", "county", "state", "all_damagecause_acres")

WA_loss1 <- subset(xrange, commodity == var_commodity & damagecause == var_damage)
WA_loss2 <- aggregate(WA_loss1$loss, by=list(WA_loss1$year, WA_loss1$county, WA_loss1$state), FUN = "sum")
WA_loss2_acres <- aggregate(WA_loss1$acres, by=list(WA_loss1$year, WA_loss1$county, WA_loss1$state), FUN = "sum")
colnames(WA_loss2) <- c("year", "county", "state", "loss")
colnames(WA_loss2_acres) <- c("year", "county", "state", "acres")

WA_loss2$acres <- WA_loss2_acres$acres
WA_loss2$lossperacre <- WA_loss2$loss / WA_loss2$acres


WA_loss_climate <- merge(WA_loss2, WA2[-4], by=c("county", "state"))
WA_loss_climate_2 <- merge(WA_loss_all, WA_loss_climate, by=c("year", "county", "state"))

#-ID



setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/")
#setwd(monthdir2)

temp = list.files(pattern = paste("Idaho", "_summary", sep=""))
myfiles = lapply(temp, read.csv, header = TRUE)
ziggy.df <- do.call(rbind , myfiles)
xrange <- as.data.frame(ziggy.df)
xrange$county <- as.character(xrange$county)
xrange$damagecause <- as.character(xrange$damagecause)

xrange_1989 <- subset(xrange, year <= 2000)
xrange_2015 <- subset(xrange, year >= 2001)
xrange_1989$loss <- xrange_1989$acres
xrange_1989$acres <- NA

xrange <- rbind(xrange_2015)

xrange$commodity <- trimws(xrange$commodity)
xrange$county <- trimws(xrange$county)
xrange$damagecause <- trimws(xrange$damagecause)


#--acres for all damage causes aggregated
ID_loss_commodity <- subset(xrange, commodity == var_commodity)
ID_loss_all <- aggregate(ID_loss_commodity$acres, by=list(ID_loss_commodity$year, ID_loss_commodity$county, ID_loss_commodity$state), FUN = "sum")
colnames(ID_loss_all) <- c("year", "county", "state", "all_damagecause_acres")

ID_loss1 <- subset(xrange, commodity == var_commodity & damagecause == var_damage)
ID_loss2 <- aggregate(ID_loss1$loss, by=list(ID_loss1$year, ID_loss1$county, ID_loss1$state), FUN = "sum")
ID_loss2_acres <- aggregate(ID_loss1$acres, by=list(ID_loss1$year, ID_loss1$county, ID_loss1$state), FUN = "sum")
colnames(ID_loss2) <- c("year", "county", "state", "loss")
colnames(ID_loss2_acres) <- c("year", "county", "state", "acres")

ID_loss2$acres <- ID_loss2_acres$acres
ID_loss2$lossperacre <- ID_loss2$loss / ID_loss2$acres


ID_loss_climate <- merge(ID_loss2, ID2[-4], by=c("county", "state"))
ID_loss_climate_2 <- merge(ID_loss_all, ID_loss_climate, by=c("year", "county", "state"))


merged_iPNW <- rbind(ID_loss_climate_2, WA_loss_climate_2, OR_loss_climate_2)
merged_iPNW$pct_acreage <- merged_iPNW$acres / merged_iPNW$all_damagecause_acres



#

#--plotting results of individual variables

pairs(cube_loss ~ pr_scaled + tmmx_scaled + pet_scaled, data = data4aa, col = data4aa$state,
      lower.panel=panel.smooth, upper.panel=panel.cor, main = "initial pairs plot")

par(mar=c(12.7, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)

data4aaaa <- merge(data4aa, merged_iPNW, by = c("state", "county", "year"))

ggplot(data4aaaa, aes(pet_scaled, cube_loss, color = state)) + 
  geom_point(aes(size = price)) +
  stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Potential Evapotranspiration (mm)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))

ggplot(data4aaaa, aes(pr_scaled, cube_loss, color = state)) + 
  geom_point(aes(size = price)) +
  stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Precipitation (mm)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))

ggplot(data4aaaa, aes(tmmx_scaled, cube_loss, color = state)) + 
  geom_point(aes(size = price)) +
  stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Max Temperature (C)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))

ggplot(data4aaaa, aes(prpet, cube_loss, color = state)) + 
  geom_point(aes(size = price)) +
  stat_smooth(aes(group = 1), method = "loess") + labs(x = "Aridity Index (PR / PET)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))

Step 4. Examining conditional relationships of optimized climate/insurance loss relationships using regression based random forest modeling

# Make big tree
form <- as.formula(loss_zscore ~ pr_zscore + tmmx_zscore + pet_zscore)

form2 <- as.formula(cube_loss ~ pr + tmmx + pet + pdsi + price)
form2a <- as.formula(loss ~ pr + tmmx + pet  + price)
form2b <- as.formula(cube_loss ~ pr.x + tmmx.x + pet.x + pdsi.x + price)


#form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + sph + srad + vs + th + erc + fm100 + fm1000 + price)

form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + price)
form3a <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + sph + srad + vs + th + erc + fm100 + fm1000 + price)

data44a <- data.frame(data4aaaa)

data5a_statecountyear <- cbind.data.frame(data44a$state, data44a$county, data44a$year)
colnames(data5a_statecountyear) <- c("state", "county", "year")

data44a$tmmx.x <- data44a$tmmx.x - 273

data5a <- cbind.data.frame(data44a$year, data44a$cube_loss, data44a$loss, data44a$pr_scaled, data44a$tmmx_scaled, data44a$fm100.x, data44a$fm1000.x, data44a$pdsi_scaled, data44a$erc, data44a$srad, data44a$vs, data44a$sph, data44a$th, data44a$pet_scaled, data44a$pct_acreage, data44a$price_scaled, data44a$state)
colnames(data5a) <- c("year", "cube_loss", "loss", "pr", "tmmx", "fm100", "fm1000", "pdsi", "erc", "srad", "vs", "sph", "th", "pet", "pct_acreage", "price", "state" )

data5a <- data.frame(data5a)
data5a$loss <- log10(data5a$loss)

data5ab <- cbind.data.frame(data5a$loss, data5a$pr, data5a$tmmx, data5a$pdsi, data5a$pet, data5a$price)
colnames(data5ab) <- c("loss", "pr", "tmmx", "pdsi", "pet", "price")
data5ab <- data.frame(data5ab)

data5ab$loss <- log10(data5ab$loss)

# load libraries
library(caret)
library(rpart)

# define training control
train_control<- trainControl(method="repeatedcv", number=10, savePredictions = TRUE)

data5b <- data.frame(data5a)
data5b <- na.omit(data5b)

data5ab <- data.frame(data5ab)
data5ab <- na.omit(data5ab)






#pairs(loss ~ pr + tmmx + pet + pdsi, data = data5b, col = data5b$state,
#      lower.panel=panel.smooth, upper.panel=panel.cor, main = "initial pairs plot")

# train the model
set.seed(9992)
#trainIndex  <- sample(1:nrow(data5b), 0.8 * nrow(data5b))

trainIndex <- createDataPartition(data5b$pct_acreage, p = .75, list = FALSE)

train <- data5b[trainIndex,]
test <- data5b[-trainIndex,]

trainIndex_loss <- caret::createDataPartition(data5ab$loss, p = .75, list = FALSE)

train_loss <- data5ab[trainIndex_loss,]
test_loss <- data5ab[-trainIndex_loss,]

#traindata=data.frame(x=train_loss,y=(train_loss))
#testdata = data.frame(x=test_loss,y=(test_loss))


rf_grid <- expand.grid(mtry = 2:5) # you can put different values for mtry


model<- caret::train(form3, data=data5b, trControl=train_control, importance=T, method="rf", tuneGrid = rf_grid, ntrees = 500)
model_singular <- caret::train(form3, data=train, trControl=train_control, method="rpart")

model_loss <- caret::train(form2a, data=train_loss, trControl=train_control, method="rf", tuneGrid = rf_grid, ntrees = 1000, importance = T)
model_loss_all <- caret::train(form2a, data=data5b, trControl=train_control, method="rf", ntrees = 500, tuneGrid = rf_grid, importance = T)
model_loss_all_acreage <- caret::train(form3, data=data5b, trControl=train_control, method="rf", ntrees = 500, tuneGrid = rf_grid, importance = T)

#cforest_model_loss <- cforest(form2a,
 #                                    data = train_loss, 
  #                                   controls=cforest_unbiased(ntree=2000, mtry=5))


#lattice::densityplot(model_loss,
 #           adjust = 1.25)

tree_num <- which(model_loss$finalModel$err.rate[, 1] == min(model_loss$finalModel$err.rate[, 1]))

lda_data <- learing_curve_dat(dat = model$trainingData, 
                              outcome = ".outcome",
                              ## `train` arguments:
                              metric = "RMSE",
                              trControl = train_control,
                              method = "rf")
## Training for 10% (n = 34)
## Training for 20% (n = 69)
## Training for 30% (n = 104)
## Training for 40% (n = 139)
## Training for 50% (n = 174)
## Training for 60% (n = 208)
## Training for 70% (n = 243)
## Training for 80% (n = 278)
## Training for 90% (n = 313)
## Training for 100% (n = 348)
pts <- pretty(lda_data$RMSE)
#pts <- c(0,0.1, 0.2, 0.3, 0.4)

lda_data$Data[lda_data$Data == "Resampling"] <- c("Validation")
ggplot(lda_data, aes(x = Training_Size, y = RMSE, color = Data)) + 
  geom_smooth(method = loess, span = .8) + 
  theme_bw()  + ggtitle("Random Forest Learning Curves: Train vs. Validation") + theme(axis.title.y = element_text(family = "Serif", size=18), axis.title.x = element_text(family = "Serif", size = 18), axis.text.x = element_text(size=rel(1.9), angle = 90, hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.9), hjust = 1, family = "Serif")) + theme(plot.title = element_text(family = "Serif", vjust = 2))  + theme(legend.text=element_text(family = "Serif", size=14)) + theme(legend.title=element_text(family = "Serif", size=16, face = "bold")) + theme(plot.title = element_text(size=24, face = "bold")) + scale_y_continuous(labels = pts, breaks = pts ) + xlab("Training Size") + ylab("RMSE") + theme(legend.position="bottom") + scale_fill_discrete(name = "Legend")

sqrt(model_loss$finalModel$mse[which.min(model_loss$finalModel$mse)])
## [1] 0.06389775
which.min(model_loss$finalModel$mse)
## [1] 96
importance <- varImp(model_loss, scale=FALSE)

importance2 <- importance$importance
importance2$variable <- rownames(importance2)
importance2 <- importance2[order(-importance2$Overall),] 

par(mar=c(6, 7, 2, 3), family = 'serif', mgp=c(5, 1, 0), las=0)

barplot(sort(importance2$Overall), horiz = TRUE, col = "lightblue", ylab = "predictor variables", xlab = "% importance", cex.lab = 1.7, las = 2, cex.axis = 1.8, cex.names = 1.8, names.arg = rev(importance2$variable))

#NRMSE

nrmse_func <-  function(obs, pred, type = "sd") {
  
  squared_sums <- sum((obs - pred)^2)
  mse <- squared_sums/length(obs)
  rmse <- sqrt(mse)
  if (type == "sd") nrmse <- rmse/sd(obs)
  if (type == "mean") nrmse <- rmse/mean(obs)
  if (type == "maxmin") nrmse <- rmse/ (max(obs) - min(obs))
  if (type == "iq") nrmse <- rmse/ (quantile(obs, 0.75) - quantile(obs, 0.25))
  if (!type %in% c("mean", "sd", "maxmin", "iq")) message("Wrong type!")
  nrmse <- round(nrmse, 3)
  return(nrmse)
  
}


#ensembe 

library(caretEnsemble)

#algorithmList <- c('gbm', 'rpart', 'ctree', 'rf', 'HYFIS', 'FS.HGD', 'ANFIS' )
algorithmList <- c('gbm', 'rpart', 'ctree', 'rf')

set.seed(100)
models <- caretList(form2, data=data5b, trControl=train_control, methodList=algorithmList) 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    45571.9967            -nan     0.1000 1347.8847
##      2    43706.1157            -nan     0.1000 1473.7796
##      3    42226.9439            -nan     0.1000  933.0998
##      4    41057.1838            -nan     0.1000 1054.8961
##      5    40036.4588            -nan     0.1000  746.8285
##      6    38827.0407            -nan     0.1000 1072.6552
##      7    37822.7236            -nan     0.1000  783.4970
##      8    37099.1738            -nan     0.1000  717.0261
##      9    36431.3838            -nan     0.1000  283.0984
##     10    35924.4774            -nan     0.1000  249.2798
##     20    31157.0867            -nan     0.1000 -116.8874
##     40    28225.5251            -nan     0.1000 -109.2071
##     60    27181.9690            -nan     0.1000  -90.3829
##     80    26374.5114            -nan     0.1000  -29.7355
##    100    25522.8726            -nan     0.1000 -127.3438
##    120    24878.5051            -nan     0.1000 -113.4964
##    140    24283.1354            -nan     0.1000  -97.6861
##    150    24055.7699            -nan     0.1000  -78.9351
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    44185.0623            -nan     0.1000 2953.7198
##      2    41611.0650            -nan     0.1000 2339.1270
##      3    39369.8265            -nan     0.1000 2040.5440
##      4    37446.5200            -nan     0.1000 1838.6510
##      5    35922.9177            -nan     0.1000 1237.2045
##      6    34402.5021            -nan     0.1000  885.3945
##      7    33096.8173            -nan     0.1000  999.2937
##      8    32197.6779            -nan     0.1000  546.7953
##      9    31190.0078            -nan     0.1000  651.8482
##     10    30491.3652            -nan     0.1000  722.5687
##     20    26739.7081            -nan     0.1000  137.5739
##     40    23120.5943            -nan     0.1000 -383.4063
##     60    21247.2739            -nan     0.1000  -62.3426
##     80    19683.3442            -nan     0.1000 -109.9295
##    100    18017.2047            -nan     0.1000 -149.0337
##    120    16793.3256            -nan     0.1000 -157.3972
##    140    16088.8012            -nan     0.1000  -54.3143
##    150    15766.3020            -nan     0.1000 -102.4167
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    44070.5813            -nan     0.1000 2963.5876
##      2    41216.2205            -nan     0.1000 2033.6613
##      3    38967.6054            -nan     0.1000 1766.3968
##      4    37234.2776            -nan     0.1000 1170.2250
##      5    35207.8928            -nan     0.1000 1769.7125
##      6    33495.2884            -nan     0.1000 1245.8306
##      7    32280.8803            -nan     0.1000  931.3760
##      8    31133.3247            -nan     0.1000  585.1639
##      9    30333.9086            -nan     0.1000  685.1851
##     10    29757.1657            -nan     0.1000   72.4121
##     20    25082.3239            -nan     0.1000  -60.7711
##     40    20432.9142            -nan     0.1000 -135.6398
##     60    17726.1503            -nan     0.1000 -166.1295
##     80    15840.0190            -nan     0.1000 -225.1396
##    100    14296.4368            -nan     0.1000  -40.5236
##    120    13037.5497            -nan     0.1000 -155.7841
##    140    12271.6360            -nan     0.1000  -21.9658
##    150    11782.2423            -nan     0.1000 -169.3482
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    49820.0430            -nan     0.1000 1944.1522
##      2    47995.6635            -nan     0.1000 1720.9042
##      3    46197.4986            -nan     0.1000 1428.4555
##      4    44717.6850            -nan     0.1000 1179.4257
##      5    43388.9852            -nan     0.1000 1023.4499
##      6    42262.9960            -nan     0.1000  793.3409
##      7    41223.4931            -nan     0.1000  586.2131
##      8    40359.1044            -nan     0.1000  527.2202
##      9    39487.7539            -nan     0.1000  900.8536
##     10    38714.6003            -nan     0.1000  449.8300
##     20    33774.7019            -nan     0.1000  205.4066
##     40    29897.3338            -nan     0.1000  -71.6875
##     60    28539.4470            -nan     0.1000 -167.2461
##     80    27614.5896            -nan     0.1000  -77.5812
##    100    26575.7723            -nan     0.1000    3.3054
##    120    25802.7776            -nan     0.1000  -73.3432
##    140    25007.5663            -nan     0.1000  -18.1882
##    150    24656.0787            -nan     0.1000  -76.9476
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    49245.4193            -nan     0.1000 1552.7467
##      2    46525.2431            -nan     0.1000 1994.4411
##      3    43876.5626            -nan     0.1000 2447.1646
##      4    41877.0815            -nan     0.1000 2029.6277
##      5    40338.5542            -nan     0.1000 1625.4020
##      6    38590.2914            -nan     0.1000 1570.3231
##      7    37038.8184            -nan     0.1000 1126.2944
##      8    36150.6687            -nan     0.1000  666.6455
##      9    35138.5735            -nan     0.1000  669.4421
##     10    34205.2666            -nan     0.1000  638.4603
##     20    28000.7642            -nan     0.1000  143.6927
##     40    23684.2034            -nan     0.1000  106.0152
##     60    21269.8980            -nan     0.1000   94.1118
##     80    19723.9114            -nan     0.1000 -107.9941
##    100    18198.9753            -nan     0.1000 -171.8609
##    120    17033.4741            -nan     0.1000 -198.6796
##    140    16038.6785            -nan     0.1000  -52.4454
##    150    15615.6507            -nan     0.1000  -78.9264
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    47851.6317            -nan     0.1000 3568.1534
##      2    44385.5476            -nan     0.1000 2176.7334
##      3    41505.2606            -nan     0.1000 2394.2301
##      4    39199.2061            -nan     0.1000 1619.8689
##      5    37878.0577            -nan     0.1000  725.9094
##      6    35567.1437            -nan     0.1000 1063.3310
##      7    34246.3304            -nan     0.1000 1087.2448
##      8    33184.6546            -nan     0.1000  616.6300
##      9    32051.9341            -nan     0.1000  417.1295
##     10    30754.0509            -nan     0.1000  560.2631
##     20    25191.4991            -nan     0.1000  162.4201
##     40    20706.3906            -nan     0.1000 -246.4121
##     60    17362.4661            -nan     0.1000  298.4624
##     80    15394.9053            -nan     0.1000  -90.3433
##    100    13863.7916            -nan     0.1000 -124.0408
##    120    12649.7822            -nan     0.1000 -278.2822
##    140    11600.7768            -nan     0.1000 -131.9622
##    150    11157.7773            -nan     0.1000   25.2477
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    51864.4320            -nan     0.1000 1974.3892
##      2    50140.9346            -nan     0.1000 1465.6597
##      3    48422.3850            -nan     0.1000 1137.2598
##      4    47015.4662            -nan     0.1000 1158.7479
##      5    45681.6114            -nan     0.1000 1025.4910
##      6    44408.3090            -nan     0.1000  -23.0852
##      7    43255.4710            -nan     0.1000  770.0290
##      8    42430.4781            -nan     0.1000  757.5815
##      9    41760.0340            -nan     0.1000  561.5739
##     10    40969.3087            -nan     0.1000  184.5594
##     20    36529.4554            -nan     0.1000  194.4991
##     40    33769.0520            -nan     0.1000   53.6747
##     60    32239.6848            -nan     0.1000 -359.1541
##     80    30848.3755            -nan     0.1000  -35.9471
##    100    29671.8747            -nan     0.1000 -198.4972
##    120    28971.2586            -nan     0.1000 -133.9165
##    140    28144.8109            -nan     0.1000 -262.3592
##    150    27718.0092            -nan     0.1000 -188.2970
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50676.0844            -nan     0.1000 2330.7197
##      2    48339.3231            -nan     0.1000 1570.6150
##      3    46553.5177            -nan     0.1000 1083.7795
##      4    44545.6800            -nan     0.1000 1181.0808
##      5    42804.0484            -nan     0.1000 1833.3006
##      6    41432.8633            -nan     0.1000 1290.0212
##      7    40167.7777            -nan     0.1000  597.1119
##      8    39015.7996            -nan     0.1000  891.1397
##      9    38129.1749            -nan     0.1000  -12.7496
##     10    37022.8829            -nan     0.1000  706.1514
##     20    32248.3341            -nan     0.1000   15.5678
##     40    27441.2026            -nan     0.1000 -166.0665
##     60    25095.1842            -nan     0.1000  -72.5908
##     80    23005.0404            -nan     0.1000  -92.4357
##    100    21356.2008            -nan     0.1000  -36.8375
##    120    19982.1154            -nan     0.1000   56.1846
##    140    18800.4898            -nan     0.1000 -172.8866
##    150    18121.5940            -nan     0.1000 -100.8612
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50499.9147            -nan     0.1000 2961.5835
##      2    47637.9450            -nan     0.1000 2133.2219
##      3    45032.5800            -nan     0.1000 2214.2762
##      4    42967.5519            -nan     0.1000 1010.4827
##      5    41782.9977            -nan     0.1000  690.9631
##      6    40226.1644            -nan     0.1000 1024.6175
##      7    39028.2233            -nan     0.1000  799.9360
##      8    37830.2803            -nan     0.1000  185.3153
##      9    36618.9748            -nan     0.1000  604.2677
##     10    35746.8661            -nan     0.1000  236.7734
##     20    30077.5272            -nan     0.1000 -116.9216
##     40    22726.2573            -nan     0.1000  -29.2178
##     60    20388.6205            -nan     0.1000   79.9329
##     80    18194.2445            -nan     0.1000  -24.0247
##    100    16210.6387            -nan     0.1000  -74.5641
##    120    14837.1060            -nan     0.1000   -7.2506
##    140    13489.5962            -nan     0.1000  -88.4818
##    150    13045.7364            -nan     0.1000 -177.1660
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    51685.8297            -nan     0.1000 1915.6448
##      2    49856.8534            -nan     0.1000 1541.1501
##      3    48050.8276            -nan     0.1000 1095.9056
##      4    46661.7100            -nan     0.1000  993.9875
##      5    45110.4678            -nan     0.1000  603.9911
##      6    43407.2590            -nan     0.1000  686.6371
##      7    42157.8470            -nan     0.1000  605.2237
##      8    41369.5002            -nan     0.1000  702.1686
##      9    40587.0253            -nan     0.1000  322.3502
##     10    39736.1717            -nan     0.1000  374.2463
##     20    35686.5504            -nan     0.1000  250.2065
##     40    32946.3518            -nan     0.1000  -19.5912
##     60    31533.4954            -nan     0.1000  -37.2919
##     80    30449.1279            -nan     0.1000 -202.9466
##    100    29524.4770            -nan     0.1000 -257.7878
##    120    28674.5226            -nan     0.1000  -61.7317
##    140    27843.3906            -nan     0.1000  -23.3820
##    150    27497.7295            -nan     0.1000 -156.4054
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50743.9863            -nan     0.1000 3287.2097
##      2    47621.7099            -nan     0.1000 2033.1226
##      3    45462.3824            -nan     0.1000 2113.3776
##      4    42968.8532            -nan     0.1000 1845.1285
##      5    41234.1718            -nan     0.1000 1672.6022
##      6    39922.7531            -nan     0.1000  777.6905
##      7    38232.6350            -nan     0.1000  983.2204
##      8    37194.5285            -nan     0.1000  586.9659
##      9    36532.3462            -nan     0.1000 -104.9967
##     10    35935.8858            -nan     0.1000  326.9653
##     20    30511.4831            -nan     0.1000   50.6200
##     40    26809.4283            -nan     0.1000  -59.2592
##     60    25078.9213            -nan     0.1000 -118.7054
##     80    22529.5604            -nan     0.1000 -204.9102
##    100    20759.1184            -nan     0.1000 -183.3041
##    120    19353.7905            -nan     0.1000  -42.0783
##    140    18037.6204            -nan     0.1000 -246.9130
##    150    17392.3890            -nan     0.1000  -69.0175
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50298.5415            -nan     0.1000 2881.0502
##      2    47295.9871            -nan     0.1000 2548.9872
##      3    44774.2123            -nan     0.1000 2143.4224
##      4    42790.2301            -nan     0.1000 1503.6932
##      5    40836.1872            -nan     0.1000 1539.4657
##      6    38845.1884            -nan     0.1000 1110.9245
##      7    37737.9741            -nan     0.1000  737.1635
##      8    36472.2379            -nan     0.1000  915.2609
##      9    35280.4969            -nan     0.1000  772.7292
##     10    34792.3238            -nan     0.1000    5.6171
##     20    28027.5999            -nan     0.1000    6.2075
##     40    23131.1276            -nan     0.1000 -134.2211
##     60    20440.4965            -nan     0.1000 -144.1386
##     80    17513.8661            -nan     0.1000 -109.5384
##    100    15600.5567            -nan     0.1000 -108.8007
##    120    14427.7950            -nan     0.1000 -265.1689
##    140    13543.9577            -nan     0.1000  -32.5880
##    150    12852.1341            -nan     0.1000 -139.7807
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    48126.4645            -nan     0.1000 1501.0104
##      2    46333.6397            -nan     0.1000 1766.3519
##      3    44878.2794            -nan     0.1000  815.6747
##      4    43607.8743            -nan     0.1000  934.5319
##      5    42268.9662            -nan     0.1000 1139.2679
##      6    41098.7479            -nan     0.1000  865.7983
##      7    39939.7220            -nan     0.1000  911.5263
##      8    39020.5703            -nan     0.1000  632.0114
##      9    38117.2252            -nan     0.1000  809.5149
##     10    37398.4297            -nan     0.1000  259.7864
##     20    32862.1363            -nan     0.1000  130.6194
##     40    29818.5887            -nan     0.1000   67.7131
##     60    28699.8411            -nan     0.1000   -1.6072
##     80    27639.9586            -nan     0.1000 -111.4352
##    100    26690.7274            -nan     0.1000 -240.2014
##    120    26077.4213            -nan     0.1000 -108.0136
##    140    25359.1170            -nan     0.1000 -135.8198
##    150    24963.8677            -nan     0.1000  -44.6817
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    46994.3787            -nan     0.1000 2366.0756
##      2    44782.7195            -nan     0.1000 1625.6221
##      3    42630.7941            -nan     0.1000 2010.9028
##      4    40380.3140            -nan     0.1000 1907.5240
##      5    38511.5059            -nan     0.1000 1347.3762
##      6    37158.2141            -nan     0.1000 1040.7794
##      7    35944.9188            -nan     0.1000  988.5578
##      8    34918.5429            -nan     0.1000  601.4898
##      9    33907.2410            -nan     0.1000  563.7247
##     10    33129.0498            -nan     0.1000  433.7560
##     20    29021.1943            -nan     0.1000 -209.3625
##     40    25838.2679            -nan     0.1000 -117.5195
##     60    22995.6265            -nan     0.1000   86.7847
##     80    21129.8014            -nan     0.1000  -73.0168
##    100    19722.1231            -nan     0.1000 -355.7565
##    120    18483.3564            -nan     0.1000 -103.9658
##    140    17421.1086            -nan     0.1000 -129.3636
##    150    17071.7623            -nan     0.1000 -114.5686
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    46583.1900            -nan     0.1000 3013.0219
##      2    43623.0206            -nan     0.1000 2192.9443
##      3    40720.1256            -nan     0.1000 2256.8098
##      4    38533.5971            -nan     0.1000 1941.2532
##      5    36799.8214            -nan     0.1000 1534.8973
##      6    34950.5173            -nan     0.1000 1249.4438
##      7    33560.2806            -nan     0.1000 1054.6028
##      8    32402.4008            -nan     0.1000  516.6160
##      9    31519.8047            -nan     0.1000  231.2806
##     10    30756.4211            -nan     0.1000  257.5617
##     20    25891.9783            -nan     0.1000 -240.3685
##     40    22095.3850            -nan     0.1000  262.8500
##     60    18425.8008            -nan     0.1000 -164.1895
##     80    16429.0818            -nan     0.1000   12.8938
##    100    14747.9318            -nan     0.1000 -118.2120
##    120    13549.6807            -nan     0.1000  -95.2072
##    140    12588.2735            -nan     0.1000 -153.4402
##    150    12136.6014            -nan     0.1000 -111.2775
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50540.4276            -nan     0.1000 1122.2727
##      2    48809.0721            -nan     0.1000 1734.7050
##      3    47228.1668            -nan     0.1000 1511.8777
##      4    45984.2442            -nan     0.1000  945.9184
##      5    44843.0796            -nan     0.1000 1090.0309
##      6    43687.9661            -nan     0.1000  927.2358
##      7    42652.2804            -nan     0.1000  543.1433
##      8    41764.9691            -nan     0.1000  727.9153
##      9    41175.1984            -nan     0.1000  243.5311
##     10    40476.1859            -nan     0.1000  237.0239
##     20    36086.0535            -nan     0.1000  -77.1676
##     40    32614.4843            -nan     0.1000  -62.6594
##     60    31140.0904            -nan     0.1000 -163.5978
##     80    29723.5330            -nan     0.1000 -276.2569
##    100    28577.7222            -nan     0.1000  -97.7706
##    120    27670.2153            -nan     0.1000   28.6113
##    140    26716.8465            -nan     0.1000 -331.2817
##    150    26247.9876            -nan     0.1000 -161.0587
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    48706.3864            -nan     0.1000 2390.7164
##      2    46080.9831            -nan     0.1000 2422.7456
##      3    43791.0307            -nan     0.1000 1645.3846
##      4    41792.3412            -nan     0.1000 1734.6349
##      5    40375.2333            -nan     0.1000  303.1323
##      6    38850.9110            -nan     0.1000  976.8961
##      7    37775.4334            -nan     0.1000  413.0602
##      8    37078.1328            -nan     0.1000  148.1620
##      9    36319.7583            -nan     0.1000  273.4414
##     10    35650.7549            -nan     0.1000   -2.0334
##     20    30371.7856            -nan     0.1000  134.4697
##     40    25595.8159            -nan     0.1000  -20.7030
##     60    23189.9930            -nan     0.1000 -167.1957
##     80    21084.4241            -nan     0.1000  -33.5400
##    100    19651.7523            -nan     0.1000 -110.3095
##    120    18386.2839            -nan     0.1000 -123.0467
##    140    17378.5990            -nan     0.1000  -37.2071
##    150    16862.1094            -nan     0.1000  -87.3071
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    48583.5167            -nan     0.1000 2289.4732
##      2    46259.7320            -nan     0.1000 1239.5100
##      3    43481.7110            -nan     0.1000 1916.4020
##      4    41348.3128            -nan     0.1000 1686.7583
##      5    39755.5499            -nan     0.1000 1178.2566
##      6    38288.2426            -nan     0.1000 1038.9520
##      7    37265.1784            -nan     0.1000  117.9867
##      8    36424.7195            -nan     0.1000  113.3802
##      9    35466.1783            -nan     0.1000  541.9288
##     10    33928.7039            -nan     0.1000  698.8113
##     20    28592.9046            -nan     0.1000  477.9799
##     40    22426.2264            -nan     0.1000 -116.1006
##     60    19145.6228            -nan     0.1000 -169.7116
##     80    17472.3659            -nan     0.1000  -63.1810
##    100    15883.9436            -nan     0.1000 -151.8144
##    120    14781.6956            -nan     0.1000 -133.8376
##    140    13625.5894            -nan     0.1000 -166.3918
##    150    13181.0354            -nan     0.1000 -181.0526
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50106.0131            -nan     0.1000 1696.6948
##      2    48191.2897            -nan     0.1000 1840.1048
##      3    46678.1575            -nan     0.1000 1144.9227
##      4    44994.1250            -nan     0.1000 1294.7915
##      5    43859.9590            -nan     0.1000 1043.4872
##      6    42721.4887            -nan     0.1000 1008.3102
##      7    41551.5071            -nan     0.1000  887.4136
##      8    40607.1356            -nan     0.1000  563.1316
##      9    39692.4938            -nan     0.1000  689.6011
##     10    38994.9483            -nan     0.1000  425.9485
##     20    34261.0672            -nan     0.1000   20.0803
##     40    31412.4529            -nan     0.1000    1.4015
##     60    30182.8418            -nan     0.1000 -139.6747
##     80    29024.6574            -nan     0.1000 -111.9641
##    100    28060.2630            -nan     0.1000  -26.4089
##    120    27075.6381            -nan     0.1000 -121.2704
##    140    26410.0445            -nan     0.1000  -99.3217
##    150    25981.1889            -nan     0.1000  -91.5102
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    48822.2234            -nan     0.1000 3020.4579
##      2    46739.3718            -nan     0.1000 1684.9822
##      3    44863.0229            -nan     0.1000 1398.0539
##      4    42862.2758            -nan     0.1000 1814.2240
##      5    41185.2553            -nan     0.1000 1427.4025
##      6    39796.3477            -nan     0.1000 1122.1133
##      7    39197.4201            -nan     0.1000   -6.5360
##      8    37975.3088            -nan     0.1000  810.2737
##      9    37131.1218            -nan     0.1000  463.4211
##     10    35961.9594            -nan     0.1000  739.0829
##     20    31371.4613            -nan     0.1000   15.1374
##     40    26856.5886            -nan     0.1000 -470.7550
##     60    23997.1852            -nan     0.1000   15.4109
##     80    22148.8598            -nan     0.1000 -166.0397
##    100    20262.2854            -nan     0.1000   10.1045
##    120    19117.1424            -nan     0.1000 -120.2211
##    140    18043.4686            -nan     0.1000 -167.5913
##    150    17687.5431            -nan     0.1000 -156.1959
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    48207.6763            -nan     0.1000 3399.9230
##      2    45565.7518            -nan     0.1000 1774.9105
##      3    43225.0273            -nan     0.1000 1888.4744
##      4    41338.5027            -nan     0.1000 1524.8722
##      5    39702.5578            -nan     0.1000  849.5830
##      6    37829.7666            -nan     0.1000  856.1821
##      7    36576.8931            -nan     0.1000  980.9611
##      8    35303.1557            -nan     0.1000  347.1572
##      9    34273.0471            -nan     0.1000  833.8494
##     10    33208.5038            -nan     0.1000  681.6445
##     20    28287.5197            -nan     0.1000    2.2072
##     40    23908.0702            -nan     0.1000 -311.4130
##     60    20591.3780            -nan     0.1000  -79.1894
##     80    18095.3402            -nan     0.1000 -280.3663
##    100    16473.2902            -nan     0.1000 -174.8572
##    120    15156.7878            -nan     0.1000 -301.8304
##    140    13597.8447            -nan     0.1000  -18.9977
##    150    13120.4740            -nan     0.1000  -12.3918
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    51830.8773            -nan     0.1000 1805.7643
##      2    50179.4826            -nan     0.1000 1743.5426
##      3    48445.8826            -nan     0.1000  772.0854
##      4    47094.7543            -nan     0.1000 1349.9851
##      5    45884.6074            -nan     0.1000  801.6917
##      6    44450.8913            -nan     0.1000  934.7534
##      7    43431.3147            -nan     0.1000  334.5053
##      8    42173.5079            -nan     0.1000  839.5433
##      9    41221.8725            -nan     0.1000  670.2029
##     10    40396.3274            -nan     0.1000  750.4961
##     20    35950.9093            -nan     0.1000 -258.0982
##     40    32688.0820            -nan     0.1000   62.3145
##     60    31067.8602            -nan     0.1000  -70.5081
##     80    30067.4492            -nan     0.1000   48.0768
##    100    29032.0504            -nan     0.1000  -60.4241
##    120    28425.4374            -nan     0.1000 -235.4553
##    140    27544.9104            -nan     0.1000 -245.2624
##    150    27231.9356            -nan     0.1000  -84.1448
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50631.5334            -nan     0.1000 3136.8134
##      2    47947.1471            -nan     0.1000 2766.3509
##      3    45297.8075            -nan     0.1000 1319.4048
##      4    43663.6449            -nan     0.1000  968.4453
##      5    42004.6954            -nan     0.1000 1402.9205
##      6    40228.5311            -nan     0.1000 1230.7467
##      7    39146.2099            -nan     0.1000  626.9079
##      8    38220.5020            -nan     0.1000  712.5858
##      9    37328.6846            -nan     0.1000  626.8824
##     10    36636.3856            -nan     0.1000  303.4536
##     20    31825.2242            -nan     0.1000 -336.6848
##     40    26891.8195            -nan     0.1000  -42.2311
##     60    24228.7427            -nan     0.1000  280.3432
##     80    22203.2760            -nan     0.1000  -82.9231
##    100    20541.5620            -nan     0.1000 -151.4118
##    120    19451.7714            -nan     0.1000 -136.2038
##    140    18384.2445            -nan     0.1000  -76.2965
##    150    18117.6406            -nan     0.1000 -133.0888
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50377.0804            -nan     0.1000 2816.5664
##      2    46603.6247            -nan     0.1000 2325.3695
##      3    44128.7254            -nan     0.1000 1676.0059
##      4    41159.6245            -nan     0.1000 1556.7815
##      5    39604.3524            -nan     0.1000 1525.1722
##      6    38168.2757            -nan     0.1000  832.6823
##      7    37191.7778            -nan     0.1000  832.8172
##      8    36081.0959            -nan     0.1000  921.2319
##      9    35068.7790            -nan     0.1000  118.5416
##     10    34448.2212            -nan     0.1000  356.0382
##     20    28234.6485            -nan     0.1000  692.2787
##     40    23136.5929            -nan     0.1000   24.4731
##     60    19988.2068            -nan     0.1000 -107.7626
##     80    17712.0669            -nan     0.1000 -342.1278
##    100    15672.7356            -nan     0.1000 -163.6213
##    120    14293.3516            -nan     0.1000   18.5972
##    140    13248.2712            -nan     0.1000 -151.6262
##    150    12889.4167            -nan     0.1000 -128.1727
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    52214.3858            -nan     0.1000 1600.9756
##      2    50758.2696            -nan     0.1000 1489.6673
##      3    49054.2707            -nan     0.1000 1327.9872
##      4    47301.3685            -nan     0.1000  399.8762
##      5    45768.8592            -nan     0.1000 1493.4214
##      6    44803.9991            -nan     0.1000  640.4816
##      7    43416.7276            -nan     0.1000  979.7663
##      8    42273.2864            -nan     0.1000  812.1099
##      9    41199.6027            -nan     0.1000  817.1695
##     10    40503.4363            -nan     0.1000  610.6940
##     20    35883.9462            -nan     0.1000   12.4933
##     40    32670.8970            -nan     0.1000  -64.3296
##     60    31260.5558            -nan     0.1000 -155.4968
##     80    30466.2838            -nan     0.1000   26.1934
##    100    29598.0159            -nan     0.1000  -86.8621
##    120    28718.5199            -nan     0.1000  -24.5701
##    140    27830.5249            -nan     0.1000 -122.2310
##    150    27507.0052            -nan     0.1000  -66.8147
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50980.7464            -nan     0.1000 2729.4021
##      2    47966.8431            -nan     0.1000 2385.4670
##      3    45573.2955            -nan     0.1000 1952.0186
##      4    43220.3331            -nan     0.1000 1580.5582
##      5    41316.0711            -nan     0.1000 1520.8372
##      6    40110.7788            -nan     0.1000  758.2270
##      7    38984.2587            -nan     0.1000  759.9578
##      8    37643.8809            -nan     0.1000  885.7060
##      9    36460.0558            -nan     0.1000  392.5879
##     10    35747.5470            -nan     0.1000  588.0000
##     20    31109.7799            -nan     0.1000    1.1121
##     40    26587.3364            -nan     0.1000 -188.9186
##     60    24433.8542            -nan     0.1000 -126.0805
##     80    22663.1829            -nan     0.1000 -324.6072
##    100    20799.8432            -nan     0.1000 -292.5516
##    120    19617.6967            -nan     0.1000  -97.9638
##    140    18541.7271            -nan     0.1000 -220.0425
##    150    18079.2632            -nan     0.1000 -120.0637
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    50567.2677            -nan     0.1000 3023.6364
##      2    47319.1588            -nan     0.1000 2752.2541
##      3    44516.0487            -nan     0.1000 2124.5583
##      4    41905.7002            -nan     0.1000 2060.9382
##      5    40179.1403            -nan     0.1000 1320.4154
##      6    38508.0668            -nan     0.1000 1288.6671
##      7    37192.0612            -nan     0.1000  983.6842
##      8    35956.5975            -nan     0.1000 1152.0748
##      9    35008.1840            -nan     0.1000  712.7755
##     10    33932.6291            -nan     0.1000  529.6448
##     20    29487.5137            -nan     0.1000  -39.4151
##     40    23306.9090            -nan     0.1000 -125.2123
##     60    20043.4544            -nan     0.1000   96.4604
##     80    17568.1606            -nan     0.1000   10.8940
##    100    15994.1306            -nan     0.1000 -137.7734
##    120    14658.3272            -nan     0.1000 -230.2337
##    140    13399.2697            -nan     0.1000 -116.7425
##    150    13042.2759            -nan     0.1000 -153.0592
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    52881.8877            -nan     0.1000 1935.0225
##      2    50680.5827            -nan     0.1000 1500.2538
##      3    48891.7757            -nan     0.1000 1301.7054
##      4    47271.9588            -nan     0.1000 1005.6993
##      5    46078.4382            -nan     0.1000 1167.7233
##      6    44925.5762            -nan     0.1000  933.9795
##      7    44169.0260            -nan     0.1000  414.3433
##      8    43275.4132            -nan     0.1000  775.0406
##      9    42500.9548            -nan     0.1000  531.5001
##     10    41575.8996            -nan     0.1000  575.8893
##     20    36562.6467            -nan     0.1000  162.4247
##     40    33053.7907            -nan     0.1000   86.8902
##     60    31770.8933            -nan     0.1000 -126.8804
##     80    30600.9333            -nan     0.1000 -262.2830
##    100    29786.9128            -nan     0.1000 -109.8036
##    120    28770.9526            -nan     0.1000 -128.7233
##    140    27977.2441            -nan     0.1000  -75.8393
##    150    27615.3638            -nan     0.1000  -93.0683
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    51945.3416            -nan     0.1000 2617.8573
##      2    49202.4023            -nan     0.1000 2311.8246
##      3    47081.7829            -nan     0.1000  845.9174
##      4    44618.7349            -nan     0.1000 2260.0250
##      5    42958.5527            -nan     0.1000 1590.3314
##      6    41228.2994            -nan     0.1000 1502.5759
##      7    40117.9901            -nan     0.1000  852.9022
##      8    38837.6802            -nan     0.1000  856.0611
##      9    37757.7404            -nan     0.1000  766.7507
##     10    36462.9150            -nan     0.1000  903.3834
##     20    31600.2785            -nan     0.1000  239.3625
##     40    27456.3945            -nan     0.1000 -219.2974
##     60    24743.5370            -nan     0.1000 -174.8742
##     80    22866.9126            -nan     0.1000 -233.4481
##    100    21181.6762            -nan     0.1000 -241.3507
##    120    20037.0629            -nan     0.1000 -149.7849
##    140    18687.5030            -nan     0.1000  -54.0579
##    150    18268.0262            -nan     0.1000  -89.2640
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    51184.7485            -nan     0.1000 2755.7692
##      2    48073.5169            -nan     0.1000 2796.8160
##      3    45597.9915            -nan     0.1000 1730.4158
##      4    43298.5591            -nan     0.1000 2129.3832
##      5    41250.3610            -nan     0.1000 1335.1225
##      6    39513.5446            -nan     0.1000 1248.0633
##      7    37845.9931            -nan     0.1000  307.0052
##      8    36804.6634            -nan     0.1000  938.8930
##      9    35755.1542            -nan     0.1000  365.6346
##     10    35030.1046            -nan     0.1000  329.6958
##     20    30317.2463            -nan     0.1000   37.7080
##     40    24363.4596            -nan     0.1000  -45.7571
##     60    20303.1326            -nan     0.1000 -142.1876
##     80    18382.4430            -nan     0.1000 -301.7427
##    100    16498.3040            -nan     0.1000 -273.2904
##    120    15164.8977            -nan     0.1000 -142.7262
##    140    13670.2717            -nan     0.1000 -101.2531
##    150    13219.4634            -nan     0.1000 -123.7589
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1    48970.3523            -nan     0.1000 2699.7400
##      2    45935.1251            -nan     0.1000 1891.2065
##      3    43202.1596            -nan     0.1000 2145.8962
##      4    41403.1738            -nan     0.1000 1861.6645
##      5    39585.5690            -nan     0.1000 1241.6523
##      6    38050.0881            -nan     0.1000  768.8151
##      7    36930.1179            -nan     0.1000  215.5665
##      8    35853.4215            -nan     0.1000  872.2806
##      9    35110.9422            -nan     0.1000   73.1868
##     10    34049.9195            -nan     0.1000  440.8312
##     20    28281.5111            -nan     0.1000 -233.3057
##     40    22285.6492            -nan     0.1000  -57.1597
##     60    19437.9398            -nan     0.1000  -11.4153
##     80    17700.1227            -nan     0.1000  -78.2389
##    100    16117.7705            -nan     0.1000  -61.4669
##    120    14606.6936            -nan     0.1000 -109.2797
##    140    13710.3714            -nan     0.1000  -62.3653
##    150    13070.2473            -nan     0.1000  -92.4734
results <- resamples(models)
summary(results)
## 
## Call:
## summary.resamples(object = results)
## 
## Models: gbm, rpart, ctree, rf 
## Number of resamples: 10 
## 
## RMSE 
##           Min.  1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## gbm   127.3188 139.1512 157.1177 170.8398 204.6460 242.6639    0
## rpart 156.5146 178.5645 188.0079 203.0930 229.8374 280.6922    0
## ctree 133.6222 156.6227 185.8617 186.2049 205.1202 244.1956    0
## rf    118.7657 142.4115 149.5901 171.4107 206.9578 260.1863    0
## 
## Rsquared 
##            Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## gbm   0.2532567 0.4275054 0.4645965 0.4551981 0.5239915 0.5571124    0
## rpart 0.0385494 0.1692423 0.2150594 0.2258874 0.2677210 0.4457296    0
## ctree 0.1168496 0.2926411 0.3938881 0.3661205 0.4316656 0.5766137    0
## rf    0.2164554 0.3738006 0.4712877 0.4517680 0.5416886 0.6109469    0
set.seed(101)
stackControl <- train_control

# Ensemble the predictions of `models` to form a new combined prediction based on glm
stack.glm <- caretStack(models, method="glm", trControl=stackControl)
print(stack.glm)
## A glm ensemble of 2 base models: gbm, rpart, ctree, rf
## 
## Ensemble results:
## Generalized Linear Model 
## 
## 348 samples
##   4 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times) 
## Summary of sample sizes: 312, 313, 313, 312, 314, 312, ... 
## Resampling results:
## 
##   RMSE      Rsquared 
##   170.8445  0.4519159
## 
## 
# make predictions
predictions<- predict(model,data5b)

predictions_loss <- predict(model_loss,data5b)

#cforest_Prediction <- predict(cforest_model_loss, newdata=test_loss, OOB=TRUE, type = "response")



predictions_loss_all <- predict(model_loss_all,data5b)


#--end ensemble



# append predictions
mydat<- cbind(data5b,predictions)
mydat_loss <- cbind(data5b,predictions_loss)
#cforest_mydat_loss <- cbind(test_loss,cforest_Prediction)

mydat_loss_all <- cbind(data5b,predictions_loss_all)
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)

 
finaldat <- subset(finaldat, county != "Kootenai" & county != "Clearwater")
countyvector <- unique(finaldat$county)
county_rmse_r2 <- data.frame(matrix(ncol =2, nrow = 24))
county_rmse_r2$county <- countyvector
colnames(county_rmse_r2) <- c("rmse", "r2", "county")


ii = 0
for (i in countyvector ) {

ii <- ii + 1

countyplot <- subset(finaldat, county == i)


# 2.1. Average of actual data
avr_y_actual <- mean(countyplot$pct_acreage)

# 2.2. Total sum of squares
ss_total <- sum((countyplot$pct_acreage - avr_y_actual)^2)

# 2.3. Regression sum of squares
ss_regression <- sum((countyplot$predictions - avr_y_actual)^2)

# 2.4. Residual sum of squares
ss_residuals <- sum((countyplot$pct_acreage - countyplot$predictions)^2)

# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)

RMSE = function(m, o){
  sqrt(mean((m - o)^2))
}

MAE <- function(m, o)
{
    mean(abs(m - o))
}

rmse <- round(RMSE(countyplot$predictions, countyplot$pct_acreage), 2)
mae <- round(MAE(countyplot$predictions, countyplot$pct_acreage), 2)


county_rmse_r2[ii,1] <-  rmse
county_rmse_r2[ii,2] <- r2

finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)

NRMSE1 <- nrmse_func(finaldat$loss, finaldat$predictions)

NRMSE2 <- nrmse_func(finaldat_loss$loss, finaldat_loss$predictions_loss)


fit1 <- lm(form3, data=data5b)

#pct_acreage historical vs. predicted


yearlist <- data.frame(c(2001:2015))
colnames(yearlist) <- "year"

countyplot <- merge(countyplot, yearlist, by=("year"), all=T)
countyplot$predictions[is.na(countyplot$predictions)] <- 0
countyplot$pct_acreage[is.na(countyplot$pct_acreage)] <- 0


par(mar=c(5, 5.1, 2, 3), family = 'serif', mgp=c(3.8, 1, 0), las=0)


plot(c(countyplot$pct_acreage) ~ c(countyplot$year), cex.lab = 1.5, cex.axis = 1.5, col = "red", 
     las = 2, xaxt = "n", xlab = "Year", ylab = "Percent Acreage Drought Claims", main = paste(i, " County, ", countyplot$state[1], ": R2 = ", r2, " RMSE = ", rmse, sep=""))

lines(countyplot$pct_acreage ~ countyplot$year, col = "red")
points(countyplot$predictions ~ countyplot$year, col = "blue")
lines(countyplot$predictions ~ countyplot$year, col = "blue")

axis(1, countyplot$year, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.5)

legend("topright", legend=c("historical", "predicted"),
       col=c("red", "blue"), lty=1, cex=1.5)

}

finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)

NRMSE1 <- nrmse_func(finaldat$loss, finaldat$predictions)

NRMSE2 <- nrmse_func(finaldat_loss$loss, finaldat_loss$predictions_loss)


 #loss historical vs predicted

finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)


finaldat_loss$loss <- 10^finaldat_loss$loss
finaldat_loss$predictions_loss <- 10^finaldat_loss$predictions_loss
fd_loss <- aggregate(finaldat_loss$loss, by=list(finaldat_loss$year), FUN = "sum")
fd_pred <- aggregate(finaldat_loss$predictions_loss, by=list(finaldat_loss$year), FUN = "sum")


# 2.1. Average of actual data
avr_y_actual <- mean(fd_loss$x)

# 2.2. Total sum of squares
ss_total <- sum((fd_loss$x - avr_y_actual)^2)

# 2.3. Regression sum of squares
ss_regression <- sum((fd_pred$x - avr_y_actual)^2)

# 2.4. Residual sum of squares
ss_residuals <- sum((fd_loss$x - fd_pred$x)^2)

# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)

RMSE = function(m, o){
  sqrt(mean((m - o)^2))
}

rmse <- round(RMSE(fd_pred$x, fd_loss$x), 2)

#plot all counties combined, by year
par(mar=c(6, 5.1, 2, 3), family = 'serif', mgp=c(3.5, .6, 0), las=0)


plot(fd_loss$x ~ fd_loss$Group.1, col = "red", cex.lab = 1.5, cex.axis = 1.3, las = 2, yaxt = "n", xaxt = "n", xlab = "Year", ylab = "Annual Wheat/Drought Loss", main = paste("24 county iPNW study area: R2 = ", r2, " RMSE = ", rmse, sep=""))
lines(fd_loss$x ~ fd_loss$Group.1, col = "red")
points(fd_pred$x ~ fd_pred$Group.1, col = "blue")
lines(fd_pred$x ~ fd_pred$Group.1, col = "blue")

axis(1, fd_loss$Group.1, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.3)

pts <- pretty(fd_loss$x / 1000000)
pts2 <- pretty(fd_loss$x)
axis(2, las = 1, cex.axis = 1.3, at = pts2, labels = paste(pts, "M", sep = ""))



legend("topleft", legend=c("historical", "predicted"),
       col=c("red", "blue"), lty=1, cex=1.3)

finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)

finaldat_loss$loss <- 10^finaldat_loss$loss
finaldat_loss$predictions_loss <- 10^finaldat_loss$predictions_loss

finaldat_loss <- subset(finaldat_loss, county != "Kootenai" & county != "Clearwater")
countyvector <- unique(finaldat_loss$county)
county_rmse_r2 <- data.frame(matrix(ncol =2, nrow = 24))
county_rmse_r2$county <- countyvector
colnames(county_rmse_r2) <- c("rmse", "r2", "county")


ii = 0
for (i in countyvector ) {

ii <- ii + 1

#finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
#finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
#finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)




countyplot <- subset(finaldat_loss, county == i)

yearlist <- data.frame(c(2001:2015))
colnames(yearlist) <- "year"

countyplot <- merge(countyplot, yearlist, by=("year"), all=T)
countyplot$predictions_loss[is.na(countyplot$predictions_loss)] <- 0
countyplot$loss[is.na(countyplot$loss)] <- 0

# 2.1. Average of actual data
avr_y_actual <- mean(countyplot$loss)

# 2.2. Total sum of squares
ss_total <- sum((countyplot$loss - avr_y_actual)^2)

# 2.3. Regression sum of squares
ss_regression <- sum((countyplot$predictions_loss - avr_y_actual)^2)

# 2.4. Residual sum of squares
ss_residuals <- sum((countyplot$loss - countyplot$predictions_loss)^2)

# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)

RMSE = function(m, o){
  sqrt(mean((m - o)^2))
}

MAE <- function(m, o)
{
    mean(abs(m - o))
}

rmse <- round(RMSE(countyplot$predictions_loss, countyplot$loss), 2)
mae <- round(MAE(countyplot$predictions_loss, countyplot$loss), 2)


county_rmse_r2[ii,1] <-  rmse
county_rmse_r2[ii,2] <- r2


#plot all counties combined, by year
par(mar=c(6, 5.1, 2, 3), family = 'serif', mgp=c(3.5, .6, 0), las=0)



plot(c(countyplot$loss) ~ c(countyplot$year), cex.lab = 1.5, cex.axis = 1.5, col = "red", 
     las = 2, yaxt = "n", xaxt = "n", xlab = "Year", ylab = "Wheat/Drought Insurance Loss", main = paste(i, " County, Oregon: R2 = ", r2, " RMSE = ", rmse, sep="") )

lines(countyplot$loss ~ countyplot$year, col = "red")
points(countyplot$predictions_loss ~ countyplot$year, col = "blue")
lines(countyplot$predictions_loss ~ countyplot$year, col = "blue")


axis(1, countyplot$year, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.5)

pts <- pretty(countyplot$loss / 1000000)
pts2 <- pretty(countyplot$loss)
axis(2, las = 1, cex.axis = 1.3, at = pts2, labels = paste(pts, "M", sep = ""))

legend("topleft", legend=c("historical", "predicted"),
       col=c("red", "blue"), lty=1, cex=1.3)

}